diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_cmdbuf.c | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_cmdbuf.h | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_texstate.c | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 1780 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.h | 90 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_chip.c | 42 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_fragprog.c | 162 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_fragprog.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_shaderinst.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_vertprog.c | 29 |
11 files changed, 1037 insertions, 1135 deletions
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index d27a3245a3..5e1504872d 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -52,6 +52,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "radeon_reg.h" +struct r600_cs_manager_legacy +{ + struct radeon_cs_manager base; + struct radeon_context *ctx; + /* hack for scratch stuff */ + uint32_t pending_age; + uint32_t pending_count; +}; + +struct r600_cs_reloc_legacy { + struct radeon_cs_reloc base; + uint32_t cindices; + uint32_t *indices; + uint32_t *reloc_indices; +}; static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm, diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index eba43d37b6..dff0009699 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R600_IT_SET_CTL_CONST 0x00006F00 #define R600_IT_SURFACE_BASE_UPDATE 0x00007300 -struct r600_cs_manager_legacy -{ - struct radeon_cs_manager base; - struct radeon_context *ctx; - /* hack for scratch stuff */ - uint32_t pending_age; - uint32_t pending_count; -}; - -struct r600_cs_reloc_legacy { - struct radeon_cs_reloc base; - uint32_t cindices; - uint32_t *indices; - uint32_t *reloc_indices; -}; - struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); /** diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 25314eff56..b66fe78ac3 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -317,20 +317,8 @@ static void r600InitGLExtensions(GLcontext *ctx) #ifdef R600_ENABLE_GLSL_TEST driInitExtensions(ctx, gl_20_extension, GL_TRUE); - //_mesa_enable_2_0_extensions(ctx); - //1.5 - ctx->Extensions.ARB_occlusion_query = GL_TRUE; - ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; - ctx->Extensions.EXT_shadow_funcs = GL_TRUE; - //2.0 - ctx->Extensions.ARB_draw_buffers = GL_TRUE; - ctx->Extensions.ARB_point_sprite = GL_TRUE; - ctx->Extensions.ARB_shader_objects = GL_TRUE; - ctx->Extensions.ARB_vertex_shader = GL_TRUE; - ctx->Extensions.ARB_fragment_shader = GL_TRUE; - ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; - ctx->Extensions.ATI_separate_stencil = GL_TRUE; - + _mesa_enable_2_0_extensions(ctx); + /* glsl compiler has problem if this is not GL_TRUE */ ctx->Shader.EmitCondCodes = GL_TRUE; #endif /* R600_ENABLE_GLSL_TEST */ diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 4ec315b78c..2a4a6e6ee1 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -917,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 309c90fdd0..e10b23b97f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" @@ -41,6 +42,39 @@ #define USE_CF_FOR_CONTINUE_BREAK 1 #define USE_CF_FOR_POP_AFTER 1 +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -330,14 +364,14 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: case SQ_OP2_INST_KILLE: @@ -378,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -500,12 +534,20 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->unCFflags = 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + + pAsm->unVetTexBits = 0; + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) { - if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) { return GL_TRUE; } @@ -1292,6 +1334,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1364,43 +1415,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) + if(SPT_VP == pAsm->currentShaderType) { - case FRAG_ATTRIB_WPOS: - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_FOGC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else + { + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - case FRAG_ATTRIB_FACE: - fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); - break; - case FRAG_ATTRIB_PNTC: - fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); - break; - } - - if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || - (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) - { - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + } } - break; + break; } } @@ -1445,8 +1518,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; if (normalized) { @@ -1465,7 +1547,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; tex_instruction_ptr->m_Word2.f.offset_z = 0x0; - tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; // dst @@ -1724,7 +1805,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -2001,7 +2082,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2090,7 +2171,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2159,6 +2240,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -2167,7 +2252,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) int current_source_index; GLuint contiguous_slots_needed; - GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -2224,262 +2309,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - - //src 0 - current_source_index = 0; - pcurrent_source = &(pAsm->S[0].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - current_source_index = 1; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; - - if( (is_single_scalar_operation == GL_TRUE) - || (GL_TRUE == bSplitInst) ) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - else + if(scalar_channel_index == (number_of_scalar_operations-1)) { - alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - if ( is_single_scalar_operation == GL_TRUE ) - { - // Override scalar_channel_index since only one scalar value will be written - if(pAsm->D.dst.writex) - { - scalar_channel_index = 0; - } - else if(pAsm->D.dst.writey) - { - scalar_channel_index = 1; - } - else if(pAsm->D.dst.writez) - { - scalar_channel_index = 2; - } - else if(pAsm->D.dst.writew) + switch(pAsm->D2.dst2.literal_slots) { - scalar_channel_index = 3; - } - } - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - current_source_index = 2; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - /* - * Judge the type of current instruction, is it vector or scalar - * instruction. - */ - if (is_single_scalar_operation) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; } - - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) -{ - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - PVSSRC * pcurrent_source; - int current_source_index; - GLuint contiguous_slots_needed; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - if (1 == pAsm->D.dst.math) - { - is_single_scalar_operation = GL_TRUE; - number_of_scalar_operations = 1; - } - else - { - is_single_scalar_operation = GL_FALSE; - number_of_scalar_operations = 4; - } - - contiguous_slots_needed = 0; - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - - initialize(pAsm); - - for (scalar_channel_index=0; - scalar_channel_index < number_of_scalar_operations; - scalar_channel_index++) - { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) + else { - return GL_FALSE; + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); } - Init_R700ALUInstruction(alu_instruction_ptr); //src 0 current_source_index = 0; @@ -2489,7 +2356,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2503,13 +2370,13 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2524,15 +2391,15 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; if(1 == pAsm->D.dst.predicated) { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; } else { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; } - + // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || (pAsm->D.dst.rtype == DST_REG_OUT) ) @@ -2540,7 +2407,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2597,8 +2464,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2626,8 +2493,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2654,7 +2521,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2665,272 +2532,19 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - R700ALUInstruction * alu_instruction_ptr; - R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; - R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; - - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - GLuint contiguous_slots_needed; - GLuint lastInstruction; - GLuint not_masked[4]; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - number_of_scalar_operations = 0; - contiguous_slots_needed = 0; - - if(1 == pAsm->D.dst.writew) - { - lastInstruction = 3; - number_of_scalar_operations++; - not_masked[3] = 1; - } - else - { - not_masked[3] = 0; - } - if(1 == pAsm->D.dst.writez) - { - lastInstruction = 2; - number_of_scalar_operations++; - not_masked[2] = 1; - } - else - { - not_masked[2] = 0; - } - if(1 == pAsm->D.dst.writey) - { - lastInstruction = 1; - number_of_scalar_operations++; - not_masked[1] = 1; - } - else - { - not_masked[1] = 0; - } - if(1 == pAsm->D.dst.writex) - { - lastInstruction = 0; - number_of_scalar_operations++; - not_masked[0] = 1; - } - else - { - not_masked[0] = 0; - } - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - else - { - contiguous_slots_needed = number_of_scalar_operations; - } - - if(1 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 1; - } - else if(2 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 2; - } - - initialize(pAsm); - - for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++) - { - if(0 == not_masked[scalar_channel_index]) - { - continue; - } - - if(scalar_channel_index == lastInstruction) - { - switch (pAsm->D2.dst2.literal) - { - case 0: - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - break; - case 1: - alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); - if (alu_instruction_ptr_hl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; - break; - case 2: - alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); - if (alu_instruction_ptr_fl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; - break; - default: - break; - }; - } - else - { - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - } - - //src 0 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 0, - &(pAsm->S[0].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 1, - &(pAsm->S[1].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if(scalar_channel_index == lastInstruction) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 2, - &(pAsm->S[2].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - if (1 == number_of_scalar_operations) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - - contiguous_slots_needed -= 2; + contiguous_slots_needed -= 1; } return GL_TRUE; @@ -2987,69 +2601,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[2].bits = 0; pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; - - return GL_TRUE; -} - -GLboolean next_ins2(r700_AssemblerBase *pAsm) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction2(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) - { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - pAsm->D2.bits = 0; - - return GL_TRUE; -} - -/* not work yet */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } @@ -3282,9 +2835,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -3554,10 +3142,13 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) { - checkop2(pAsm); + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3567,17 +3158,30 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + else { - return GL_FALSE; + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + } - - if ( GL_FALSE == next_ins2(pAsm) ) + + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4384,77 +3988,67 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + checkop1(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_src(pAsm, 0, -1); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + next_ins(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -4467,7 +4061,7 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) } pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) { @@ -4494,32 +4088,34 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) { - if( GL_FALSE == checkop2(pAsm) ) - { - return GL_FALSE; - } + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; pAsm->D.dst.predicated = 1; - pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4626,22 +4222,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) - { - case OPCODE_TEX: - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); - return GL_FALSE; - break; - case OPCODE_TXP: - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); - return GL_FALSE; - break; - } - if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); @@ -4719,24 +4299,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); - /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4749,13 +4311,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; @@ -4772,30 +4334,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); - - next_ins(pAsm); - - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); @@ -4820,14 +4364,32 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) { pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -5103,6 +4665,11 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5247,6 +4814,11 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) GLboolean assemble_BRK(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5313,6 +4885,10 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) GLboolean assemble_CONT(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5471,7 +5047,7 @@ void add_return_inst(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; } -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) { /* Put in sub */ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) @@ -5486,7 +5062,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->unSubArraySize += 10; } - pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; @@ -5577,9 +5153,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Shift, GLuint uiNumberInsts, - struct prog_instruction *pILInst) + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) { + GLint uiIL_Offset; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; if(GL_FALSE == add_cf_instruction(pAsm) ) @@ -5612,8 +5192,12 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->unCallerArraySize += 10; } - pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex; - pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; pAsm->unCallerArrayPointer++; @@ -5623,7 +5207,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLboolean bRet; for(j=0; j<pAsm->unSubArrayPointer; j++) { - if(nILindex == pAsm->subs[j].subIL_Offset) + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) { /* compiled before */ max = pAsm->subs[j].unStackDepthMax @@ -5641,7 +5225,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; unSubID = pAsm->unSubArrayPointer; - bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); if(GL_TRUE == bRet) { @@ -5651,6 +5235,8 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, { pAsm->CALLSTACK[pAsm->CALLSP].max = max; } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; } return bRet; @@ -5668,11 +5254,12 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 0; /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ #if 0 pAsm->S[0].src.rtype = SRC_REC_LITERAL; //pAsm->S[0].src.reg = 0; @@ -5697,7 +5284,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->S[0].src.swizzlez = flagValue; pAsm->S[0].src.swizzlew = flagValue; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5722,9 +5309,10 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ pAsm->S[0].src.rtype = DST_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->flag_reg_index; @@ -5758,7 +5346,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->S[1].src.swizzlez = SQ_SEL_1; pAsm->S[1].src.swizzlew = SQ_SEL_1; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5814,6 +5402,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) } GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) @@ -5853,6 +5442,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } #endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } switch (pILInst[i].Opcode) { @@ -5881,7 +5476,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -5923,9 +5518,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_KIL: case OPCODE_KIL_NV: - /* done at OPCODE_SE/SGT...etc. */ - /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) - return GL_FALSE; */ + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + return GL_FALSE; break; case OPCODE_LG2: if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) @@ -5964,6 +5558,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -5979,7 +5593,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: @@ -5988,151 +5602,23 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SEQ: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGT: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) - { - return GL_FALSE; - } + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; } break; @@ -6144,61 +5630,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; } @@ -6211,60 +5648,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; @@ -6272,51 +5660,9 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SNE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; @@ -6344,7 +5690,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } break; - + case OPCODE_DDX: + case OPCODE_DDY: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: @@ -6417,7 +5764,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_BGNSUB: - if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) ) + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) { return GL_FALSE; } @@ -6432,9 +5779,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_CAL: if( GL_FALSE == assemble_CAL(pR700AsmCode, - pILInst[i].BranchTarget, + pILInst[i].BranchTarget, + uiIL_Shift, uiNumberInsts, - pILInst) ) + pILInst, + NULL) ) { return GL_FALSE; } @@ -6471,7 +5820,7 @@ GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) return GL_TRUE; } -GLboolean RelocProgram(r700_AssemblerBase * pAsm) +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) { GLuint i; GLuint unCFoffset; @@ -6481,6 +5830,12 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; /* remove flags init if they are not used */ @@ -6526,6 +5881,11 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) unCFoffset = plstCFmain->uNumOfNode; + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + /* Reloc subs */ for(i=0; i<pAsm->unSubArrayPointer; i++) { @@ -6563,6 +5923,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) pInst = pInst->pNextInst; }; + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + /* Put sub into main */ plstCFmain->pTail->pNextInst = plstCFsub->pHead; plstCFmain->pTail = plstCFsub->pTail; @@ -6576,11 +6014,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) { pAsm->callers[i].cf_ptr->m_Word0.f.addr = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } } return GL_TRUE; } +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -6938,6 +6581,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) FREE(pR700AsmCode->callers); } + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 130fc89dae..dbd9860f7d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -114,20 +153,22 @@ typedef struct PVSDSTtag typedef struct PVSINSTtag { - BITS literal :2; + BITS literal_slots :2; BITS SaturateMode :2; + BITS index_mode :3; } PVSINST; typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; @@ -294,6 +335,7 @@ typedef struct SUB_OFFSET GLint subIL_Offset; GLuint unCFoffset; GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; TypedShaderList lstCFInstructions_local; } SUB_OFFSET; @@ -302,6 +344,9 @@ typedef struct CALLER_POINTER GLint subIL_Offset; GLint subDescIndex; R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; } CALLER_POINTER; #define SQ_MAX_CALL_DEPTH 0x00000020 @@ -343,8 +388,10 @@ typedef struct r700_AssemblerBase PVSDWORD D; PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; @@ -415,6 +462,7 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; @@ -432,6 +480,13 @@ typedef struct r700_AssemblerBase GLuint unCFflags; + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + + GLuint unVetTexBits; + } r700_AssemblerBase; //Internal use @@ -453,7 +508,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -526,13 +581,6 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); -GLboolean next_ins2(r700_AssemblerBase *pAsm); -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm); - -/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); - GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); @@ -546,7 +594,6 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); @@ -567,12 +614,12 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); @@ -588,13 +635,15 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm); GLboolean assemble_COND(r700_AssemblerBase *pAsm); GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex); +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); GLboolean assemble_RET(r700_AssemblerBase *pAsm); GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Offest, GLuint uiNumberInsts, - struct prog_instruction *pILInst); + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, @@ -605,16 +654,23 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); -GLboolean RelocProgram(r700_AssemblerBase * pAsm); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 8126777bf4..c124e02184 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct r700_vertex_program *vp = context->selected_vp; + struct radeon_bo *bo = NULL; unsigned int i; BATCH_LOCALS(&context->radeon); @@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; uint32_t offset; if (t) { @@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + } + else + { + R600_OUT_BATCH(i * 7); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); @@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) } } +#define SAMPLER_STRIDE 3 + static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; + + struct r700_vertex_program *vp = context->selected_vp; + BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; if (t) { BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1 + } + else + { + R600_OUT_BATCH(i * 3); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); @@ -1163,7 +1189,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) count += 3; if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + /* targets are enabled in r700SetRenderTarget but state + size is calculated before that. Until MRT's are done + hardcode target0 as enabled. */ + count += 3; + for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) { if (r700->render_target[ui].enabled) count += 3; } @@ -1306,9 +1336,9 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); - ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(sx, always, 9, r700SendSXState); ALLOC_STATE(vgt, always, 41, r700SendVGTState); ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index e9ef6c8695..ca0710b681 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -34,6 +34,7 @@ #include "main/imports.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" +#include "shader/program.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -42,6 +43,54 @@ #include "r700_debug.h" +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog) +{ + static const gl_state_index winstate[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + struct prog_instruction *newInst, *inst; + GLint win_size; /* state reference */ + GLuint wpos_temp; /* temp register */ + int i, j; + + /* PARAM win_size = STATE_FB_SIZE */ + win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); + + wpos_temp = fprog->Base.NumTemporaries++; + + /* scan program where WPOS is used and replace with wpos_temp */ + inst = fprog->Base.Instructions; + for (i = 0; i < fprog->Base.NumInstructions; i++) { + for (j=0; j < 3; j++) { + if(inst->SrcReg[j].File == PROGRAM_INPUT && + inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) { + inst->SrcReg[j].File = PROGRAM_TEMPORARY; + inst->SrcReg[j].Index = wpos_temp; + } + } + inst++; + } + + _mesa_insert_instructions(&(fprog->Base), 0, 1); + + newInst = fprog->Base.Instructions; + /* invert wpos.y + * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ + newInst[0].Opcode = OPCODE_ADD; + newInst[0].DstReg.File = PROGRAM_TEMPORARY; + newInst[0].DstReg.Index = wpos_temp; + newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; + + newInst[0].SrcReg[0].File = PROGRAM_INPUT; + newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; + newInst[0].SrcReg[0].Negate = NEGATE_Y; + + newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[1].Index = win_size; + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + +} + //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, struct gl_fragment_program *mesa_fp, @@ -93,7 +142,7 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } - + /* order has been taken care of */ #if 1 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) @@ -149,6 +198,17 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->number_used_registers += unMaxVarying + 1; } #endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } + + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++; + } /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -303,10 +363,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit; + int i; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); + + if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS) + { + insert_wpos_code(ctx, mesa_fp); + } + + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { @@ -315,7 +382,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, InitShaderProgram(&(fp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + if( GL_FALSE == AssembleInstr(0, + 0, mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) @@ -328,7 +403,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } - if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) { return GL_FALSE; } @@ -451,6 +526,35 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + + + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) + { + ui++; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask); + //SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + } + else + { + CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + } + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); @@ -470,6 +574,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) + r700->SPI_PS_INPUT_CNTL[ui].u32All = 0; + unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -535,6 +643,35 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } + + + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) { unBit = 1 << i; @@ -583,6 +720,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index e562bfa478..39c59c9201 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -48,6 +48,8 @@ struct r700_fragment_program }; /* Internal */ +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog); + void Map_Fragment_Program(r700_AssemblerBase *pAsm, struct gl_fragment_program *mesa_fp, GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h index 2829cca0a3..cdb9a570f7 100644 --- a/src/mesa/drivers/dri/r600/r700_shaderinst.h +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h @@ -42,6 +42,13 @@ #define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0 #define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0 +//richard dec.10 glsl +#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000 +#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012 +#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012 +#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012 +//------------------- + #define SHADERINST_TYPEMASK_CF 0x10 #define SHADERINST_TYPEMASK_ALU 0x20 #define SHADERINST_TYPEMASK_TEX 0x40 diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index d3d1da7959..90fac078ff 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -337,7 +337,15 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, InitShaderProgram(&(vp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + if(GL_FALSE == AssembleInstr(0, + 0, vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) @@ -350,7 +358,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) { return GL_FALSE; } @@ -667,5 +675,24 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } |