diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.c | 27 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_texstate.c | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 2194 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.h | 195 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_chip.c | 103 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_fragprog.c | 211 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_fragprog.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_render.c | 21 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_shader.c | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_shader.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_vertprog.c | 72 |
11 files changed, 2495 insertions, 359 deletions
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index dbd233729c..25314eff56 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -74,6 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ +//#define R600_ENABLE_GLSL_TEST 1 + #define need_GL_VERSION_2_0 #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters @@ -109,6 +111,7 @@ static const struct dri_extension card_extensions[] = { {"GL_ARB_texture_env_crossbar", NULL}, {"GL_ARB_texture_env_dot3", NULL}, {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_non_power_of_two", NULL}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, @@ -155,7 +158,11 @@ static const struct dri_extension mm_extensions[] = { * functions added by GL_ATI_separate_stencil. */ static const struct dri_extension gl_20_extension[] = { +#ifdef R600_ENABLE_GLSL_TEST + {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, +#else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +#endif /* R600_ENABLE_GLSL_TEST */ }; static const struct tnl_pipeline_stage *r600_pipeline[] = { @@ -308,6 +315,26 @@ static void r600InitGLExtensions(GLcontext *ctx) if (r600->radeon.radeonScreen->kernel_mm) driInitExtensions(ctx, mm_extensions, GL_FALSE); +#ifdef R600_ENABLE_GLSL_TEST + driInitExtensions(ctx, gl_20_extension, GL_TRUE); + //_mesa_enable_2_0_extensions(ctx); + //1.5 + ctx->Extensions.ARB_occlusion_query = GL_TRUE; + ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; + ctx->Extensions.EXT_shadow_funcs = GL_TRUE; + //2.0 + ctx->Extensions.ARB_draw_buffers = GL_TRUE; + ctx->Extensions.ARB_point_sprite = GL_TRUE; + ctx->Extensions.ARB_shader_objects = GL_TRUE; + ctx->Extensions.ARB_vertex_shader = GL_TRUE; + ctx->Extensions.ARB_fragment_shader = GL_TRUE; + ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; + ctx->Extensions.ATI_separate_stencil = GL_TRUE; + + /* glsl compiler has problem if this is not GL_TRUE */ + ctx->Shader.EmitCondCodes = GL_TRUE; +#endif /* R600_ENABLE_GLSL_TEST */ + if (driQueryOptionb (&r600->radeon.optionCache, "disable_stencil_two_side")) _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 4ec315b78c..2a4a6e6ee1 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -917,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 67e0ee7746..e84f524525 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,12 +32,49 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" #include "r700_assembler.h" +#define USE_CF_FOR_CONTINUE_BREAK 1 +#define USE_CF_FOR_POP_AFTER 1 + +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -327,22 +364,27 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: //case SQ_OP2_INST_MAX_DX10: //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETE: + case SQ_OP2_INST_SETNE: case SQ_OP2_INST_SETGT: case SQ_OP2_INST_SETGE: case SQ_OP2_INST_PRED_SETE: @@ -358,6 +400,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_TRUNC: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -369,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -383,98 +426,120 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->pR700Shader = pShader; pAsm->currentShaderType = spt; - pAsm->cf_last_export_ptr = NULL; + pAsm->cf_last_export_ptr = NULL; + + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; - pAsm->cf_current_export_clause_ptr = NULL; - pAsm->cf_current_alu_clause_ptr = NULL; - pAsm->cf_current_tex_clause_ptr = NULL; - pAsm->cf_current_vtx_clause_ptr = NULL; - pAsm->cf_current_cf_clause_ptr = NULL; + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; - // No clause has been created yet - pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; - pAsm->number_of_colorandz_exports = 0; - pAsm->number_of_exports = 0; - pAsm->number_of_export_opcodes = 0; + pAsm->alu_x_opcode = 0; + pAsm->D2.bits = 0; - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; - pAsm->uLastPosUpdate = 0; + pAsm->uLastPosUpdate = 0; - *(BITS *) &pAsm->fp_stOutFmt0 = 0; + *(BITS *) &pAsm->fp_stOutFmt0 = 0; - pAsm->uIIns = 0; - pAsm->uOIns = 0; - pAsm->number_used_registers = 0; - pAsm->uUsedConsts = 256; + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; - // Fragment programs - pAsm->uBoolConsts = 0; - pAsm->uIntConsts = 0; - pAsm->uInsts = 0; - pAsm->uConsts = 0; + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; - pAsm->FCSP = 0; - pAsm->fc_stack[0].type = FC_NONE; + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; - pAsm->branch_depth = 0; - pAsm->max_branch_depth = 0; + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); - pAsm->aArgSubst[0] = - pAsm->aArgSubst[1] = - pAsm->aArgSubst[2] = - pAsm->aArgSubst[3] = (-1); + pAsm->uOutputs = 0; - pAsm->uOutputs = 0; + for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) + { + pAsm->color_export_register_number[i] = (-1); + } - for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) - { - pAsm->color_export_register_number[i] = (-1); - } + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); - pAsm->depth_export_register_number = (-1); - pAsm->stencil_export_register_number = (-1); - pAsm->coverage_to_mask_export_register_number = (-1); - pAsm->mask_export_register_number = (-1); + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; - pAsm->starting_export_register_number = 0; - pAsm->starting_vfetch_register_number = 0; - pAsm->starting_temp_register_number = 0; - pAsm->uFirstHelpReg = 0; + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) + { + pAsm->input_color_is_used[ i ] = GL_FALSE; + } - pAsm->input_position_is_used = GL_FALSE; - pAsm->input_normal_is_used = GL_FALSE; + for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) + { + pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; + } + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } - for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) - { - pAsm->input_color_is_used[ i ] = GL_FALSE; - } + pAsm->number_of_inputs = 0; - for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) - { - pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; - } + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; - for (i=0; i<VERT_ATTRIB_MAX; i++) - { - pAsm->vfetch_instruction_ptr_array[ i ] = NULL; - } + pAsm->subs = NULL; + pAsm->unSubArraySize = 0; + pAsm->unSubArrayPointer = 0; + pAsm->callers = NULL; + pAsm->unCallerArraySize = 0; + pAsm->unCallerArrayPointer = 0; + + pAsm->CALLSP = 0; + pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0; + pAsm->CALLSTACK[0].plstCFInstructions_local + = &(pAsm->pR700Shader->lstCFInstructions); + + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; + + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); - pAsm->number_of_inputs = 0; + pAsm->unCFflags = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; - return 0; + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) @@ -592,6 +657,31 @@ int check_current_clause(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm) +{ + if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE)) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr = + (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_cf_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, R700VertexInstruction* vertex_instruction_ptr) { @@ -987,7 +1077,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -998,7 +1089,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1031,7 +1123,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1042,7 +1135,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1053,7 +1147,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[1] = GL_FALSE; } - if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) @@ -1153,6 +1248,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: if (1 == pILInst->SrcReg[src].RelAddr) { setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); @@ -1166,7 +1262,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; break; case PROGRAM_INPUT: - setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_INPUT; switch (pAsm->currentShaderType) { @@ -1179,7 +1275,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - radeon_error("Invalid source argument type\n"); + radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); return GL_FALSE; } } @@ -1235,6 +1331,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1294,6 +1399,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) else { switch (pILInst->SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -1315,7 +1421,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX3: case FRAG_ATTRIB_TEX4: case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: @@ -1331,10 +1437,17 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_PNTC: fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); break; - case FRAG_ATTRIB_VAR0: - fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); - break; } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + break; } } @@ -1517,6 +1630,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, { src_sel = pSource->reg + CFILE_REGISTER_OFFSET; } + else if (pSource->rtype == SRC_REC_LITERAL) + { + src_sel = SQ_ALU_SRC_LITERAL; + } else { radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", @@ -1606,7 +1723,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, return GL_FALSE; } - if ( pAsm->cf_current_alu_clause_ptr == NULL || + if ( pAsm->alu_x_opcode != 0 || + pAsm->cf_current_alu_clause_ptr == NULL || ( (pAsm->cf_current_alu_clause_ptr != NULL) && (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) ) ) @@ -1636,9 +1754,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; - //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; - pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + if(pAsm->alu_x_opcode != 0) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode; + pAsm->alu_x_opcode = 0; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + } pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -1646,7 +1772,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -1923,7 +2049,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2012,7 +2138,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2045,7 +2171,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_gpr(sel) ) { if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) - { + { return GL_FALSE; } @@ -2057,7 +2183,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, else { if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) - { + { return GL_FALSE; } } @@ -2069,7 +2195,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_cfile(sel) ) { if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) - { + { return GL_FALSE; } } @@ -2081,6 +2207,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -2089,7 +2219,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) int current_source_index; GLuint contiguous_slots_needed; - GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -2146,23 +2276,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } //src 0 current_source_index = 0; @@ -2172,7 +2323,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2186,13 +2337,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2204,9 +2355,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; } - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || @@ -2215,7 +2374,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2245,7 +2404,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; if (pAsm->D.dst.op3) { @@ -2272,8 +2431,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2301,8 +2460,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2329,7 +2488,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2340,19 +2499,19 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; + { + return GL_FALSE; } } - contiguous_slots_needed = 0; + contiguous_slots_needed -= 1; } return GL_TRUE; @@ -2403,11 +2562,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) //reset for next inst. pAsm->D.bits = 0; + pAsm->D2.bits = 0; pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; + pAsm->D2.bits = 0; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } @@ -2640,9 +2802,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -2910,13 +3107,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_KIL(r700_AssemblerBase *pAsm) -{ - /* TODO: doc says KILL has to be last(end) ALU clause */ - - checkop1(pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -2929,21 +3128,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = 0; - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - + else + { + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* Doc says KILL has to be last(end) ALU clause */ pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -3007,6 +3219,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) { return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) { return GL_FALSE; @@ -3742,77 +3955,137 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); + checkop1(pAsm); - tmp = gethelpr(pAsm); + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + assemble_src(pAsm, 0, -1); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + next_ins(pAsm); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + return GL_TRUE; +} - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + pAsm->D.dst.predicated = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -3895,6 +4168,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -3992,24 +4266,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); - /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4022,13 +4278,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; @@ -4045,30 +4301,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); - - next_ins(pAsm); - - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); @@ -4104,11 +4342,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) { pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -4265,27 +4506,909 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_IF(r700_AssemblerBase *pAsm) +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) +{ + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; + } +} + +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; + + return GL_TRUE; +} + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) { + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) +{ + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_IF; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].midLen= 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + +#ifndef USE_CF_FOR_POP_AFTER + if(GL_TRUE != bHasElse) + { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + } +#endif /* USE_CF_FOR_POP_AFTER */ + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, + 0, + sizeof(R700ControlFlowGenericClause *) ); + pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; + //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; + +#ifndef USE_CF_FOR_POP_AFTER + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; + return GL_TRUE; } GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) + { + /* no else in between */ + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + else + { + pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF) + { + radeon_error("if/endif in shader code are not paired. \n"); + return GL_FALSE; + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_PUSH_VPM); + + return GL_TRUE; +} + +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; + pAsm->fc_stack[pAsm->FCSP].midLen = 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_BRK(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Break is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif //USE_CF_FOR_CONTINUE_BREAK + return GL_TRUE; +} + +GLboolean assemble_CONT(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Continue is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif /* USE_CF_FOR_CONTINUE_BREAK */ + + return GL_TRUE; +} + +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) +{ + GLuint i; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#ifdef USE_CF_FOR_CONTINUE_BREAK + for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++) + { + pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex; + } + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } +#endif + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP) + { + radeon_error("loop/endloop in shader code are not paired. \n"); + return GL_FALSE; + } + + GLuint unFCSP; + GLuint unIF = 0; + if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) + { + for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + breakLoopOnFlag(pAsm, unFCSP); + break; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) + { +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ + pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; + } + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_LOOP); + + return GL_TRUE; +} + +void add_return_inst(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; +} + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) +{ + /* Put in sub */ + if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) + { + pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs, + sizeof(SUB_OFFSET) * pAsm->unSubArraySize, + sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) ); + if(NULL == pAsm->subs) + { + return GL_FALSE; + } + pAsm->unSubArraySize += 10; + } + + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + + pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; + pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local + = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->unSubArrayPointer++; + + /* start sub */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) +{ + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + + pAsm->CALLSP--; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP--; + + return GL_TRUE; +} + +GLboolean assemble_RET(r700_AssemblerBase *pAsm) +{ + GLuint unIF = 0; + + if(pAsm->CALLSP > 0) + { /* in sub */ + GLuint unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + setRetInLoopFlag(pAsm, SQ_SEL_1); + breakLoopOnFlag(pAsm, unFCSP); + pAsm->unCFflags |= LOOPRET_FLAGS; + + return GL_TRUE; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + } + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Shift, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) +{ + GLint uiIL_Offset; + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + /* Put in caller */ + if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) + { + pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, + sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, + sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) ); + if(NULL == pAsm->callers) + { + return GL_FALSE; + } + pAsm->unCallerArraySize += 10; + } + + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; + + pAsm->unCallerArrayPointer++; + + int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; + for(j=0; j<pAsm->unSubArrayPointer; j++) + { + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) + { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; + return GL_TRUE; + } + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; + } + + return bRet; +} + +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.op3 = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->flag_reg_index; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ +#if 0 + pAsm->S[0].src.rtype = SRC_REC_LITERAL; + //pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = flagValue; + pAsm->S[0].src.swizzley = flagValue; + pAsm->S[0].src.swizzlez = flagValue; + pAsm->S[0].src.swizzlew = flagValue; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + return GL_TRUE; +} + +GLboolean testFlag(r700_AssemblerBase *pAsm) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; + + //Test flag + GLuint tmp = gethelpr(pAsm); + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ + + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->flag_reg_index; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; +#if 0 + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + //pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_1; + pAsm->S[1].src.swizzley = SQ_SEL_1; + pAsm->S[1].src.swizzlez = SQ_SEL_1; + pAsm->S[1].src.swizzlew = SQ_SEL_1; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + + return GL_TRUE; +} + +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) +{ + testFlag(pAsm); + jumpToOffest(pAsm, 1, 4); + setRetInLoopFlag(pAsm, SQ_SEL_0); + pops(pAsm, unIF + 1); + add_return_inst(pAsm); + return GL_TRUE; } -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) +{ + testFlag(pAsm); + + //break + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + pops(pAsm, 1); + + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) { GLuint i; pR700AsmCode->pILInst = pILInst; - for(i=0; i<uiNumberInsts; i++) + for(i=uiFirstInst; i<uiNumberInsts; i++) { pR700AsmCode->uiCurInst = i; +#ifndef USE_CF_FOR_CONTINUE_BREAK + if(OPCODE_BRK == pILInst[i+1].Opcode) + { + switch(pILInst[i].Opcode) + { + case OPCODE_SLE: + pILInst[i].Opcode = OPCODE_SGT; + break; + case OPCODE_SLT: + pILInst[i].Opcode = OPCODE_SGE; + break; + case OPCODE_SGE: + pILInst[i].Opcode = OPCODE_SLT; + break; + case OPCODE_SGT: + pILInst[i].Opcode = OPCODE_SLE; + break; + case OPCODE_SEQ: + pILInst[i].Opcode = OPCODE_SNE; + break; + case OPCODE_SNE: + pILInst[i].Opcode = OPCODE_SEQ; + break; + default: + break; + } + } +#endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } + switch (pILInst[i].Opcode) { case OPCODE_ABS: @@ -4313,7 +5436,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -4342,7 +5465,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) return GL_FALSE; break; - //case OP_FLR_INT: + //case OP_FLR_INT: ; + // if ( GL_FALSE == assemble_FLR_INT() ) // return GL_FALSE; // break; @@ -4353,7 +5477,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + case OPCODE_KIL_NV: + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) return GL_FALSE; break; case OPCODE_LG2: @@ -4393,6 +5518,26 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -4408,22 +5553,78 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) return GL_FALSE; - break; + break; + + case OPCODE_SEQ: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) + { + return GL_FALSE; + } + break; + + case OPCODE_SGT: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + return GL_FALSE; + } + break; case OPCODE_SGE: if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { return GL_FALSE; - break; + } + break; + + /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/ case OPCODE_SLT: - if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SLE: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SNE: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) + { return GL_FALSE; - break; + } + break; //case OP_STP: // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) @@ -4457,30 +5658,104 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + case OPCODE_XPD: if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_IF : - if ( GL_FALSE == assemble_IF(pR700AsmCode) ) - return GL_FALSE; + { + GLboolean bHasElse = GL_FALSE; + + if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE) + { + bHasElse = GL_TRUE; + } + + if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) + { + return GL_FALSE; + } + } break; + case OPCODE_ELSE : - radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); - //if ( GL_FALSE == assemble_BAD("ELSE") ) + if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_ENDIF: if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_BGNLOOP: + if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BRK: + if( GL_FALSE == assemble_BRK(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CONT: + if( GL_FALSE == assemble_CONT(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_ENDLOOP: + if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BGNSUB: + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) + { + return GL_FALSE; + } + break; + + case OPCODE_RET: + if( GL_FALSE == assemble_RET(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CAL: + if( GL_FALSE == assemble_CAL(pR700AsmCode, + pILInst[i].BranchTarget, + uiIL_Shift, + uiNumberInsts, + pILInst, + NULL) ) + { + return GL_FALSE; + } + break; + //case OPCODE_EXPORT: // if ( GL_FALSE == assemble_EXPORT() ) // return GL_FALSE; // break; + case OPCODE_ENDSUB: + return assemble_ENDSUB(pR700AsmCode); + case OPCODE_END: //pR700AsmCode->uiCurInst = i; //This is to remaind that if in later exoort there is depth/stencil @@ -4497,6 +5772,417 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; } +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) +{ + setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + return GL_TRUE; +} + +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) +{ + GLuint i; + GLuint unCFoffset; + TypedShaderList * plstCFmain; + TypedShaderList * plstCFsub; + + R700ShaderInstruction * pInst; + R700ControlFlowGenericClause * pCFInst; + + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(0 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + + if(0 == pAsm->unSubArrayPointer) + { + return GL_TRUE; + } + + unCFoffset = plstCFmain->uNumOfNode; + + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + + /* Reloc subs */ + for(i=0; i<pAsm->unSubArrayPointer; i++) + { + pAsm->subs[i].unCFoffset = unCFoffset; + plstCFsub = &(pAsm->subs[i].lstCFInstructions_local); + + pInst = plstCFsub->pHead; + + /* reloc instructions */ + while(pInst) + { + if(SIT_CF_GENERIC == pInst->m_ShaderInstType) + { + pCFInst = (R700ControlFlowGenericClause *)pInst; + + switch (pCFInst->m_Word1.f.cf_inst) + { + case SQ_CF_INST_POP: + case SQ_CF_INST_JUMP: + case SQ_CF_INST_ELSE: + case SQ_CF_INST_LOOP_END: + case SQ_CF_INST_LOOP_START: + case SQ_CF_INST_LOOP_START_NO_AL: + case SQ_CF_INST_LOOP_CONTINUE: + case SQ_CF_INST_LOOP_BREAK: + pCFInst->m_Word0.f.addr += unCFoffset; + break; + default: + break; + } + } + + pInst->m_uIndex += unCFoffset; + + pInst = pInst->pNextInst; + }; + + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + + /* Put sub into main */ + plstCFmain->pTail->pNextInst = plstCFsub->pHead; + plstCFmain->pTail = plstCFsub->pTail; + plstCFmain->uNumOfNode += plstCFsub->uNumOfNode; + + unCFoffset += plstCFsub->uNumOfNode; + } + + /* reloc callers */ + for(i=0; i<pAsm->unCallerArrayPointer; i++) + { + pAsm->callers[i].cf_ptr->m_Word0.f.addr + = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } + } + + return GL_TRUE; +} + +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -4791,6 +6477,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } // At least one param should be exported if (export_count) @@ -4825,6 +6530,21 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) { FREE(pR700AsmCode->pucOutMask); FREE(pR700AsmCode->pInstDeps); + + if(NULL != pR700AsmCode->subs) + { + FREE(pR700AsmCode->subs); + } + if(NULL != pR700AsmCode->callers) + { + FREE(pR700AsmCode->callers); + } + + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index c66db502a1..6ef945dfda 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -72,7 +111,8 @@ typedef enum SrcRegisterType SRC_REG_INPUT = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, - NUMBER_OF_SRC_REG_TYPE = 4 + SRC_REC_LITERAL = 4, + NUMBER_OF_SRC_REG_TYPE = 5 } SrcRegisterType; typedef enum DstRegisterType @@ -111,16 +151,24 @@ typedef struct PVSDSTtag BITS addrmode1:1; //32 } PVSDST; +typedef struct PVSINSTtag +{ + BITS literal_slots :2; + BITS SaturateMode :2; + BITS index_mode :3; +} PVSINST; + typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; @@ -148,6 +196,7 @@ typedef union PVSDWORDtag { BITS bits; PVSDST dst; + PVSINST dst2; PVSSRC src; PVSMATH math; float f; @@ -251,6 +300,8 @@ enum FC_IF = 1, FC_LOOP = 2, FC_REP = 3, + FC_PUSH_VPM = 4, + FC_PUSH_WQM = 5, COND_NONE = 0, COND_BOOL = 1, @@ -263,22 +314,56 @@ enum typedef struct FC_LEVEL { - unsigned int first; ///< first fc instruction on level (if, rep, loop) - unsigned int* mid; ///< middle instructions - else or all breaks on this level - unsigned int midLen; - unsigned int type; - unsigned int cond; - unsigned int inv; - unsigned int bpush; ///< 1 if first instruction does branch stack push - int id; ///< id of bool or int variable + R700ControlFlowGenericClause * first; + R700ControlFlowGenericClause ** mid; + unsigned int unNumMid; + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD { - GLboolean bEnableMini; - GLuint mega_fetch_remainder; + GLboolean bEnableMini; + GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; +typedef struct SUB_OFFSET +{ + GLint subIL_Offset; + GLuint unCFoffset; + GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; + TypedShaderList lstCFInstructions_local; +} SUB_OFFSET; + +typedef struct CALLER_POINTER +{ + GLint subIL_Offset; + GLint subDescIndex; + R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; +} CALLER_POINTER; + +#define SQ_MAX_CALL_DEPTH 0x00000020 + +typedef struct CALL_LEVEL +{ + unsigned int FCSP_BeforeEntry; + GLint subDescIndex; + GLushort current; + GLushort max; + TypedShaderList * plstCFInstructions_local; +} CALL_LEVEL; + +#define HAS_CURRENT_LOOPRET 0x1L +#define HAS_LOOPRET 0x2L +#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET + typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; @@ -294,14 +379,19 @@ typedef struct r700_AssemblerBase // No clause has been created yet CF_CLAUSE_TYPE cf_current_clause_type; + BITS alu_x_opcode; + GLuint number_of_exports; GLuint number_of_colorandz_exports; GLuint number_of_export_opcodes; PVSDWORD D; + PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; @@ -310,6 +400,8 @@ typedef struct r700_AssemblerBase unsigned int number_used_registers; unsigned int uUsedConsts; + unsigned int flag_reg_index; + // Fragment programs unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; @@ -330,9 +422,6 @@ typedef struct r700_AssemblerBase unsigned int FCSP; FC_LEVEL fc_stack[32]; - unsigned int branch_depth; - unsigned int max_branch_depth; - //----------------------------------------------------------------------------------- // ArgSubst used in Assemble_Source() function //----------------------------------------------------------------------------------- @@ -373,11 +462,29 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; /* we inserted helper intructions and need barrier on next TEX ins */ GLboolean need_tex_barrier; + + SUB_OFFSET * subs; + GLuint unSubArraySize; + GLuint unSubArrayPointer; + CALLER_POINTER * callers; + GLuint unCallerArraySize; + GLuint unCallerArrayPointer; + unsigned int CALLSP; + CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH]; + + GLuint unCFflags; + + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + } r700_AssemblerBase; //Internal use @@ -399,7 +506,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -446,6 +553,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); + +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm); +void add_return_inst(r700_AssemblerBase *pAsm); + void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, @@ -467,13 +578,20 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); +GLboolean testFlag(r700_AssemblerBase *pAsm); +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP); +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF); + GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); @@ -481,7 +599,7 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); -GLboolean assemble_KIL(r700_AssemblerBase *pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); GLboolean assemble_LOG(r700_AssemblerBase *pAsm); @@ -494,17 +612,37 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); + GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); -GLboolean assemble_IF(r700_AssemblerBase *pAsm); +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse); +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm); GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm); +GLboolean assemble_BRK(r700_AssemblerBase *pAsm); +GLboolean assemble_COND(r700_AssemblerBase *pAsm); +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); +GLboolean assemble_RET(r700_AssemblerBase *pAsm); +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Offest, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -514,14 +652,25 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); + int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 02c56b98d9..ee2a0a4c8a 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -446,68 +446,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); - R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); - R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, - pbo, - r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); + R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, + pbo, + r700->ps.SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); - R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); - R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, - pbo, - r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); + R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, + pbo, + r700->vs.SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F); + //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) @@ -1309,8 +1318,8 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); ALLOC_STATE(vpt, always, 16, r700SendViewportState); ALLOC_STATE(fs, always, 18, r700SendFSState); - ALLOC_STATE(vs, always, 18, r700SendVSState); - ALLOC_STATE(ps, always, 21, r700SendPSState); + ALLOC_STATE(vs, always, 21, r700SendVSState); + ALLOC_STATE(ps, always, 24, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index ccafd433bf..d15f013710 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -44,12 +44,18 @@ //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { unsigned int unBit; unsigned int i; GLuint ui; + /* match fp inputs with vp exports. */ + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + pAsm->number_used_registers = 0; //Input mapping : mesa_fp->Base.InputsRead set the flag, set in @@ -61,32 +67,93 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) - { - pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; - } + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } + +/* order has been taken care of */ +#if 1 + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++; + } + } +#else + if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 ) + { + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying; + struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying; + struct gl_program_parameter * pVsParam; + struct gl_program_parameter * pPsParam; + GLuint j, k; + GLuint unMaxVarying = 0; + + for(i=0; i<VsVarying->NumParameters; i++) + { + pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0; + } + + for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_fp->Base.InputsRead & unBit) + { + j = i - FRAG_ATTRIB_VAR0; + pPsParam = PsVarying->Parameters + j; + + for(k=0; k<VsVarying->NumParameters; k++) + { + pVsParam = VsVarying->Parameters + k; + + if( strcmp(pPsParam->Name, pVsParam->Name) == 0) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k; + if(k > unMaxVarying) + { + unMaxVarying = k; + } + break; + } + } + } + } + + pAsm->number_used_registers += unMaxVarying + 1; + } +#endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -127,6 +194,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->pucOutMask[ui] = 0x0; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -233,22 +302,35 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, } GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit; + int i; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp); + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { return GL_FALSE; } + + InitShaderProgram(&(fp->r700AsmCode)); - if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions, + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + + if( GL_FALSE == AssembleInstr(0, + 0, + mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) { @@ -260,6 +342,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) + { + return GL_FALSE; + } + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 : (fp->r700AsmCode.number_used_registers - 1); @@ -300,7 +387,7 @@ void r700SelectFragmentShader(GLcontext *ctx) } if (GL_FALSE == fp->translated) - r700TranslateFragmentShader(fp, &(fp->mesa_program)); + r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); } void * r700GetActiveFpShaderBo(GLcontext * ctx) @@ -378,6 +465,20 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); @@ -394,6 +495,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) EXPORT_MODE_shift, EXPORT_MODE_mask); // emit ps input map + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -407,8 +511,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -420,8 +524,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -433,8 +537,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -448,8 +552,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -459,6 +563,35 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } + exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) { @@ -469,7 +602,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; - if(NULL != paramList) { + if(NULL != paramList) + { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -482,14 +616,33 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index cbb108d212..e562bfa478 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -49,12 +49,14 @@ struct r700_fragment_program /* Internal */ void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp, + GLcontext *ctx); GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_fp); GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_vp); + struct gl_fragment_program *mesa_vp, + GLcontext *ctx); /* Interface */ extern void r700SelectFragmentShader(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 47f89c91f8..eab27cbd84 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -526,6 +526,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); assert(src_ptr != NULL); @@ -559,6 +562,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, break; } + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -577,6 +582,8 @@ static void r700AlignDataToDword(GLcontext *ctx, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); @@ -596,6 +603,7 @@ static void r700AlignDataToDword(GLcontext *ctx, } } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -664,14 +672,18 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); assert(context->stream_desc[index].bo->ptr != NULL); + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, context->stream_desc[index].bo_offset); switch (context->stream_desc[index].dwords) { case 1: - radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); @@ -686,6 +698,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input assert(0); break; } + radeon_bo_unmap(context->stream_desc[index].bo); } } @@ -757,6 +770,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -770,6 +784,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #if MESA_BIG_ENDIAN } else @@ -780,6 +795,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -792,6 +808,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer { *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #endif } @@ -837,11 +854,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); _mesa_memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(context->ind_buf.bo); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index 955ea4e4e1..2eed1acc2f 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -159,13 +159,18 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->lstVTXInstructions.uNumOfNode=0; } +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF) +{ + pShader->plstCFInstructions_active = plstCF; +} + void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) { R700ControlFlowSXClause* pSXClause; R700ControlFlowSMXClause* pSMXClause; - pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode; - AddInstToList(&(pShader->lstCFInstructions), + pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode; + AddInstToList(pShader->plstCFInstructions_active, (R700ShaderInstruction*)pCFInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index c6a058617e..0599ffd901 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -109,6 +109,7 @@ typedef struct R700_Shader GLuint uStackSize; GLuint uMaxCallDepth; + TypedShaderList * plstCFInstructions_active; TypedShaderList lstCFInstructions; TypedShaderList lstALUInstructions; TypedShaderList lstTEXInstructions; @@ -132,13 +133,13 @@ void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruc void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); - //Interface void Init_R700_Shader(R700_Shader * pShader); void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst); void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst); void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst); void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst); +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF); void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index ffc6068bd8..90fac078ff 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } } + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[i] = unTotal++; + } + } + return (unTotal - unStart); } @@ -235,6 +244,8 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -324,7 +335,18 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + InitShaderProgram(&(vp->r700AsmCode)); + + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + + if(GL_FALSE == AssembleInstr(0, + 0, + vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { @@ -336,6 +358,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) + { + return GL_FALSE; + } + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 : (vp->r700AsmCode.number_used_registers - 1); @@ -612,6 +639,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { + /* vp->mesa_program was cloned, not updated by glsl shader api. */ + /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ + /* so, use ctx->VertexProgem._Current */ + struct gl_program_parameter_list *paramListOrginal = + paramListOrginal = ctx->VertexProgram._Current->Base.Parameters; + _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -624,13 +657,42 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) + { + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + } + else + { + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } } } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } |