From 44c6c20b69839ea130a255496f5f692186b68793 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 9 Oct 2009 10:46:12 +0300 Subject: r600: fixup KIL instruction a bit - KILLGT takes 2 arguments - arb KIL has no dst register - add TODO about clause ending but currently piglit fp-kil passes and does not hang the card --- src/mesa/drivers/dri/r600/r700_assembler.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 903b6968be..fefae22ba7 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) switch (pAsm->D.dst.opcode) { - case SQ_OP2_INST_ADD: + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -356,7 +357,6 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOV: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: - case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -2744,15 +2744,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm) { + /* TODO: doc says KILL has to be last(end) ALU clause */ + checkop1(pAsm); pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; - - if ( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; @@ -2765,20 +2765,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - - if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) { - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; - } - else - { //PROGRAM_OUTPUT - pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + return GL_FALSE; } - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - noswizzle_PVSSRC(&(pAsm->S[1].src)); - if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; -- cgit v1.2.3 From 97dd35bd6f2e2654b96923fd06bf9761e7b2269d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 12 Oct 2009 12:20:26 +0300 Subject: r600: DPH adds w comp of second operand, so set first one to 1 instead --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fefae22ba7..a1331fdfd2 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2635,7 +2635,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) } else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) { - onecomp_PVSSRC(&(pAsm->S[1].src), 3); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); } if ( GL_FALSE == next_ins(pAsm) ) -- cgit v1.2.3 From da66d9e12d339c5c6df08ea0bd11a550c9c57b36 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 12 Oct 2009 12:58:40 +0300 Subject: r600: LIT dst.y gets value from src.x seems I overlooked this when removing hardcoded swizzles for this one previously --- src/mesa/drivers/dri/r600/r700_assembler.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index a1331fdfd2..ed597c027b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3026,6 +3026,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); -- cgit v1.2.3 From 95851d8cb232cbd1312d2b8de471ba2aeb276911 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 9 Oct 2009 10:46:12 +0300 Subject: r600: fixup KIL instruction a bit - KILLGT takes 2 arguments - arb KIL has no dst register - add TODO about clause ending but currently piglit fp-kil passes and does not hang the card --- src/mesa/drivers/dri/r600/r700_assembler.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 00eda544d4..e79c256c6b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) switch (pAsm->D.dst.opcode) { - case SQ_OP2_INST_ADD: + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -356,7 +357,6 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOV: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: - case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -2617,15 +2617,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm) { + /* TODO: doc says KILL has to be last(end) ALU clause */ + checkop1(pAsm); pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; - - if ( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; @@ -2638,20 +2638,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - - if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) { - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; - } - else - { //PROGRAM_OUTPUT - pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + return GL_FALSE; } - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - noswizzle_PVSSRC(&(pAsm->S[1].src)); - if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; -- cgit v1.2.3 From 74c31e5d05f7ed342fb143cb6f637b54e8961973 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 12 Oct 2009 12:20:26 +0300 Subject: r600: DPH adds w comp of second operand, so set first one to 1 instead --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e79c256c6b..1837712883 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2508,7 +2508,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) } else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) { - onecomp_PVSSRC(&(pAsm->S[1].src), 3); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); } if ( GL_FALSE == next_ins(pAsm) ) -- cgit v1.2.3 From 606becc7f3513354548f587d84db731046616401 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 12 Oct 2009 12:58:40 +0300 Subject: r600: LIT dst.y gets value from src.x seems I overlooked this when removing hardcoded swizzles for this one previously --- src/mesa/drivers/dri/r600/r700_assembler.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1837712883..25b27cd829 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2899,6 +2899,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); -- cgit v1.2.3 From 16c6a3b71e6be04b6bb3d08fcb658194c5251fc7 Mon Sep 17 00:00:00 2001 From: Robert Noland Date: Wed, 14 Oct 2009 14:04:24 -0500 Subject: r600: FRAG_ATTRIB_WPOS and FRAG_ATTRIB_FOGC appear to be supported. Report unsupported attributes while I'm here. Signed-off-by: Robert Noland --- src/mesa/drivers/dri/r600/r700_assembler.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 25b27cd829..a683008746 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1180,8 +1180,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case PROGRAM_INPUT: switch (pILInst->SrcReg[0].Index) { + case FRAG_ATTRIB_WPOS: case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: @@ -1194,7 +1196,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + case FRAG_ATTRIB_VAR0: + fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); + break; } break; } -- cgit v1.2.3 From b86302283b48654682e0580c53ece01bf095fa95 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 20 Oct 2009 11:45:39 +0300 Subject: r600: add beginnings of ARL instruction --- src/mesa/drivers/dri/r600/r700_assembler.c | 35 +++++++++++++++++++++++++++--- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index a683008746..d0eb9949a6 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -355,6 +355,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 2; case SQ_OP2_INST_MOV: + case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: case SQ_OP2_INST_EXP_IEEE: @@ -2064,7 +2065,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2398,6 +2399,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_ARL(r700_AssemblerBase *pAsm) +{ /* TODO: ar values dont' persist between clauses */ + if( GL_FALSE == checkop1(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_BAD(char *opcode_str) { radeon_error("Not yet implemented instruction (%s)\n", opcode_str); @@ -3812,8 +3842,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - radeon_error("Not yet implemented instruction OPCODE_ARL \n"); - //if ( GL_FALSE == assemble_BAD("ARL") ) + if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_ARR: diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 73bb8bac55..d639592702 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -461,6 +461,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm); GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); +GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); GLboolean assemble_COS(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From a88c9296cb079ff42ef901113d0fe772228e6feb Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Wed, 21 Oct 2009 12:23:27 +0300 Subject: r600: need to export something from PS Also avoids empty shader for "END" - seems to be somewhat valid fp Maybe this can be done differently in the future (fake FRAG_RESULT_COLOR already in Map_Fragment_Program() or is there a way to program the chip to not hang in case of no exports. --- src/mesa/drivers/dri/r600/r700_assembler.c | 11 ++++++++++- src/mesa/drivers/dri/r600/r700_fragprog.c | 10 +++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index d0eb9949a6..6ff08e1cfb 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4187,6 +4187,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten) { unsigned int unBit; + GLuint export_count = 0; if(pR700AsmCode->depth_export_register_number >= 0) { @@ -4208,6 +4209,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } unBit = 1 << FRAG_RESULT_DEPTH; if(OutputsWritten & unBit) @@ -4221,8 +4223,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } - + /* Need to export something, otherwise we'll hang + * results are undefined anyway */ + if(export_count == 0) + { + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + } + if(pR700AsmCode->cf_last_export_ptr != NULL) { pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 62a1ea1a22..3736bce11c 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -251,7 +251,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, number_of_colors_exported--; } - fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + /* illegal to set this to 0 */ + if(number_of_colors_exported || z_enabled) + { + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + } + else + { + fp->r700Shader.exportMode = (1 << 1); + } fp->translated = GL_TRUE; -- cgit v1.2.3 From d00cbba403640c82683a876fa795cd638f1bbc24 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 1 Nov 2009 21:38:48 +0100 Subject: r600: implement EXP op in compiler --- src/mesa/drivers/dri/r600/r700_assembler.c | 132 ++++++++++++++++++++++++++++- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + 2 files changed, 130 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 917318c02a..fbf1f29fa3 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2729,6 +2729,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm) { return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); } + +GLboolean assemble_EXP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // FLOOR tmp.x, a.x + // EX2 dst.x tmp.x + + if (pAsm->pILInst->DstReg.WriteMask & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // FRACT dst.y a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // EX2 dst.z, a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // MOV dst.w 1.0 + + if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} GLboolean assemble_FLR(r700_AssemblerBase *pAsm) { @@ -4004,10 +4131,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_EXP: - radeon_error("Not yet implemented instruction OPCODE_EXP \n"); - //if ( GL_FALSE == assemble_BAD("EXP") ) + if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) return GL_FALSE; - break; // approx of EX2 + break; case OPCODE_FLR: if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 8cbca066e9..317feb1b7d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -477,6 +477,7 @@ GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); +GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From 213ec8251cb3e859b41306eae4092d89592c33db Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 1 Nov 2009 22:28:02 +0100 Subject: r600: implement LOG op in compiler --- src/mesa/drivers/dri/r600/r700_assembler.c | 216 ++++++++++++++++++++++++++++- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + 2 files changed, 214 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fbf1f29fa3..e0d7d4fa6b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3037,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_LOG(r700_AssemblerBase *pAsm) +{ + BITS tmp1, tmp2, tmp3; + + checkop1(pAsm); + + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + tmp3 = gethelpr(pAsm); + + // FIXME: The hardware can do fabs() directly on input + // elements, but the compiler doesn't have the + // capability to use that. + + // MAX tmp1.x, a.x, -a.x (fabs(a.x)) + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Entire algo: + // + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + // MOV dst.x, tmp3.x + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + // MOV dst.z, tmp2.x + // MOV dst.w, 1.0 + + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.x, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.z, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.w 1.0 + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) { int tmp, ii; @@ -4166,10 +4377,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_LOG: - radeon_error("Not yet implemented instruction OPCODE_LOG \n"); - //if ( GL_FALSE == assemble_BAD("LOG") ) + if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) return GL_FALSE; - break; // approx of LG2 + break; case OPCODE_MAD: if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 317feb1b7d..c66db502a1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -484,6 +484,7 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm); GLboolean assemble_KIL(r700_AssemblerBase *pAsm); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); +GLboolean assemble_LOG(r700_AssemblerBase *pAsm); GLboolean assemble_MAD(r700_AssemblerBase *pAsm); GLboolean assemble_LIT(r700_AssemblerBase *pAsm); GLboolean assemble_MAX(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From 3132853e1242607d5ff62785cd7dad5ef3a783d0 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 17 Nov 2009 16:25:02 -0500 Subject: r600 : Initial version of glsl fc. --- src/mesa/drivers/dri/r600/r700_assembler.c | 2572 ++++++++++++++++++++++++---- src/mesa/drivers/dri/r600/r700_assembler.h | 111 +- src/mesa/drivers/dri/r600/r700_chip.c | 99 +- src/mesa/drivers/dri/r600/r700_fragprog.c | 94 +- src/mesa/drivers/dri/r600/r700_shader.c | 9 +- src/mesa/drivers/dri/r600/r700_shader.h | 3 +- src/mesa/drivers/dri/r600/r700_vertprog.c | 21 +- 7 files changed, 2507 insertions(+), 402 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e0d7d4fa6b..4b5d40bd3a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -38,6 +38,8 @@ #include "r700_assembler.h" +#define USE_CF_FOR_CONTINUE_BREAK 1 + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -343,6 +345,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MIN: //case SQ_OP2_INST_MAX_DX10: //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETE: + case SQ_OP2_INST_SETNE: case SQ_OP2_INST_SETGT: case SQ_OP2_INST_SETGE: case SQ_OP2_INST_PRED_SETE: @@ -398,6 +402,9 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->number_of_exports = 0; pAsm->number_of_export_opcodes = 0; + pAsm->alu_x_opcode = 0; + + pAsm->D2.bits = 0; pAsm->D.bits = 0; pAsm->S[0].bits = 0; @@ -474,6 +481,22 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; + pAsm->subs = NULL; + pAsm->unSubArraySize = 0; + pAsm->unSubArrayPointer = 0; + pAsm->callers = NULL; + pAsm->unCallerArraySize = 0; + pAsm->unCallerArrayPointer = 0; + + pAsm->CALLSP = 0; + pAsm->CALLSTACK[0].FCSP_BeforeEntry; + pAsm->CALLSTACK[0].plstCFInstructions_local + = &(pAsm->pR700Shader->lstCFInstructions); + + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); + + pAsm->unCFflags = 0; + return 0; } @@ -592,6 +615,31 @@ int check_current_clause(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm) +{ + if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE)) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr = + (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_cf_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, R700VertexInstruction* vertex_instruction_ptr) { @@ -1153,6 +1201,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: if (1 == pILInst->SrcReg[src].RelAddr) { setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); @@ -1179,7 +1228,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - radeon_error("Invalid source argument type\n"); + radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); return GL_FALSE; } } @@ -1315,7 +1364,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX3: case FRAG_ATTRIB_TEX4: case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: @@ -1335,6 +1384,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); break; } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + break; } } @@ -1517,6 +1576,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, { src_sel = pSource->reg + CFILE_REGISTER_OFFSET; } + else if (pSource->rtype == SRC_REC_LITERAL) + { + src_sel = SQ_ALU_SRC_LITERAL; + } else { radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", @@ -1606,7 +1669,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, return GL_FALSE; } - if ( pAsm->cf_current_alu_clause_ptr == NULL || + if ( pAsm->alu_x_opcode != 0 || + pAsm->cf_current_alu_clause_ptr == NULL || ( (pAsm->cf_current_alu_clause_ptr != NULL) && (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) ) ) @@ -1636,9 +1700,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; - //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; - pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + if(pAsm->alu_x_opcode != 0) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode; + pAsm->alu_x_opcode = 0; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + } pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -2358,146 +2430,711 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean next_ins(r700_AssemblerBase *pAsm) +GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + GLuint number_of_scalar_operations; + GLboolean is_single_scalar_operation; + GLuint scalar_channel_index; - if( GL_TRUE == pAsm->is_tex ) + PVSSRC * pcurrent_source; + int current_source_index; + GLuint contiguous_slots_needed; + + GLuint uNumSrc = r700GetNumOperands(pAsm); + + GLboolean bSplitInst = GL_FALSE; + + if (1 == pAsm->D.dst.math) { - if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { - if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) - { - radeon_error("Error assembling TEX instruction\n"); - return GL_FALSE; - } - } else { - if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) - { - radeon_error("Error assembling TEX instruction\n"); - return GL_FALSE; - } - } + is_single_scalar_operation = GL_TRUE; + number_of_scalar_operations = 1; } else - { //ALU - if( GL_FALSE == assemble_alu_instruction(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } + is_single_scalar_operation = GL_FALSE; + number_of_scalar_operations = 4; } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - return GL_TRUE; -} - -GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) -{ - BITS tmp; - - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // opcode tmp.x, a.x - // MOV dst, tmp.x - pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; - - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + contiguous_slots_needed = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) { - return GL_FALSE; + contiguous_slots_needed = 4; } - if ( GL_FALSE == next_ins(pAsm) ) + initialize(pAsm); + + for (scalar_channel_index=0; + scalar_channel_index < number_of_scalar_operations; + scalar_channel_index++) { - return GL_FALSE; - } + R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + if (alu_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstruction(alu_instruction_ptr); + + //src 0 + current_source_index = 0; + pcurrent_source = &(pAsm->S[0].src); - // Now replicate result to all necessary channels in destination - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + + if (uNumSrc > 1) + { + // Process source 1 + current_source_index = 1; + pcurrent_source = &(pAsm->S[current_source_index].src); - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + //other bits + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); - noneg_PVSSRC(&(pAsm->S[0].src)); + if( (is_single_scalar_operation == GL_TRUE) + || (GL_TRUE == bSplitInst) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + else + { + alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; + } - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } + + // dst + if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + } + else + { + radeon_error("Only temp destination registers supported for ALU dest regs.\n"); + return GL_FALSE; + } - return GL_TRUE; -} + alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype -GLboolean assemble_ABS(r700_AssemblerBase *pAsm) -{ - checkop1(pAsm); + if ( is_single_scalar_operation == GL_TRUE ) + { + // Override scalar_channel_index since only one scalar value will be written + if(pAsm->D.dst.writex) + { + scalar_channel_index = 0; + } + else if(pAsm->D.dst.writey) + { + scalar_channel_index = 1; + } + else if(pAsm->D.dst.writez) + { + scalar_channel_index = 2; + } + else if(pAsm->D.dst.writew) + { + scalar_channel_index = 3; + } + } - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } - - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if (pAsm->D.dst.op3) + { + //op3 - return GL_TRUE; -} + alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; -GLboolean assemble_ADD(r700_AssemblerBase *pAsm) -{ - if( GL_FALSE == checkop2(pAsm) ) - { - return GL_FALSE; - } + //There's 3rd src for op3 + current_source_index = 2; + pcurrent_source = &(pAsm->S[current_source_index].src); - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - + if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + else + { + //op2 + if (pAsm->bR6xx) + { + alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; + } + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) + { + return GL_FALSE; + } + + /* + * Judge the type of current instruction, is it vector or scalar + * instruction. + */ + if (is_single_scalar_operation) + { + if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + else + { + if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) + { + return 1; + } + } + + contiguous_slots_needed = 0; + } + + return GL_TRUE; +} + +GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) +{ + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + + GLuint number_of_scalar_operations; + GLboolean is_single_scalar_operation; + GLuint scalar_channel_index; + + GLuint contiguous_slots_needed; + GLuint lastInstruction; + GLuint not_masked[4]; + + GLuint uNumSrc = r700GetNumOperands(pAsm); + + GLboolean bSplitInst = GL_FALSE; + + number_of_scalar_operations = 0; + contiguous_slots_needed = 0; + + if(1 == pAsm->D.dst.writew) + { + lastInstruction = 3; + number_of_scalar_operations++; + not_masked[3] = 1; + } + else + { + not_masked[3] = 0; + } + if(1 == pAsm->D.dst.writez) + { + lastInstruction = 2; + number_of_scalar_operations++; + not_masked[2] = 1; + } + else + { + not_masked[2] = 0; + } + if(1 == pAsm->D.dst.writey) + { + lastInstruction = 1; + number_of_scalar_operations++; + not_masked[1] = 1; + } + else + { + not_masked[1] = 0; + } + if(1 == pAsm->D.dst.writex) + { + lastInstruction = 0; + number_of_scalar_operations++; + not_masked[0] = 1; + } + else + { + not_masked[0] = 0; + } + + if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + { + contiguous_slots_needed = 4; + } + else + { + contiguous_slots_needed = number_of_scalar_operations; + } + + if(1 == pAsm->D2.dst2.literal) + { + contiguous_slots_needed += 1; + } + else if(2 == pAsm->D2.dst2.literal) + { + contiguous_slots_needed += 2; + } + + initialize(pAsm); + + for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++) + { + if(0 == not_masked[scalar_channel_index]) + { + continue; + } + + if(scalar_channel_index == lastInstruction) + { + switch (pAsm->D2.dst2.literal) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + if (alu_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + if (alu_instruction_ptr_hl == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + if (alu_instruction_ptr_fl == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + default: + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + if (alu_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstruction(alu_instruction_ptr); + } + + //src 0 + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + 0, + &(pAsm->S[0].src), + scalar_channel_index) ) + { + return GL_FALSE; + } + + if (uNumSrc > 1) + { + // Process source 1 + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + 1, + &(pAsm->S[1].src), + scalar_channel_index) ) + { + return GL_FALSE; + } + } + + //other bits + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + + if(scalar_channel_index == lastInstruction) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + + alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0; + } + + // dst + if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + } + else + { + radeon_error("Only temp destination registers supported for ALU dest regs.\n"); + return GL_FALSE; + } + + alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype + + alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; + + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; + + if (pAsm->D.dst.op3) + { + //op3 + alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; + + //There's 3rd src for op3 + if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, + 2, + &(pAsm->S[2].src), + scalar_channel_index) ) + { + return GL_FALSE; + } + } + else + { + //op2 + if (pAsm->bR6xx) + { + alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; + alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; + alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; + } + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) + { + return GL_FALSE; + } + + if (1 == number_of_scalar_operations) + { + if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + else + { + if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + + contiguous_slots_needed -= 2; + } + + return GL_TRUE; +} + +GLboolean next_ins(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_TRUE == pAsm->is_tex ) + { + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } else { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } + } + else + { //ALU + if( GL_FALSE == assemble_alu_instruction(pAsm) ) + { + radeon_error("Error assembling ALU instruction\n"); + return GL_FALSE; + } + } + + if(pAsm->D.dst.rtype == DST_REG_OUT) + { + if(pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->D2.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + + return GL_TRUE; +} + +GLboolean next_ins2(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + //ALU + if( GL_FALSE == assemble_alu_instruction2(pAsm) ) + { + radeon_error("Error assembling ALU instruction\n"); + return GL_FALSE; + } + + if(pAsm->D.dst.rtype == DST_REG_OUT) + { + if(pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->D2.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + + //richard nov.16 glsl + pAsm->D2.bits = 0; + + return GL_TRUE; +} + +/* not work yet */ +GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + //ALU + if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) ) + { + radeon_error("Error assembling ALU instruction\n"); + return GL_FALSE; + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->D2.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + return GL_TRUE; +} + +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // opcode tmp.x, a.x + // MOV dst, tmp.x + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ABS(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ADD(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + if( GL_FALSE == assemble_dst(pAsm) ) { return GL_FALSE; @@ -3809,10 +4446,78 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) pAsm->S[0].src.swizzlez = SQ_SEL_0; pAsm->S[0].src.swizzlew = SQ_SEL_0; - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins2(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -4077,223 +4782,930 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } + // Set src1 to tex unit id + pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + //No sw info from mesa compiler, so hard code here. + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == tex_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == tex_src(pAsm) ) + { + return GL_FALSE; + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); + } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_XPD(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + + // result1 + (neg) result0 + setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp; + + neg_PVSSRC(&(pAsm->S[2].src)); + noswizzle_PVSSRC(&(pAsm->S[2].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + // Use tmp as source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; + + return GL_TRUE; +} + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_IF; + pAsm->fc_stack[pAsm->FCSP].bpush = 0; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].midLen= 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + if(GL_TRUE != bHasElse) + { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + } + + pAsm->branch_depth++; + + if(pAsm->branch_depth > pAsm->max_branch_depth) + { + pAsm->max_branch_depth = pAsm->branch_depth; + } + return GL_TRUE; +} + +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - next_ins(pAsm); + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - /* tmp1.xy = temp2.xy */ - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, + 0, + sizeof(R700ControlFlowGenericClause *) ); + pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; + //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; - next_ins(pAsm); - pAsm->aArgSubst[1] = tmp1; - need_barrier = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; + + return GL_TRUE; +} + +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) +{ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) + { + /* no else in between */ + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + else + { + pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; - pAsm->is_tex = GL_TRUE; - if ( GL_TRUE == need_barrier ) + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) { - pAsm->need_tex_barrier = GL_TRUE; + FREE(pAsm->fc_stack[pAsm->FCSP].mid); } - // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - //No sw info from mesa compiler, so hard code here. - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_Y; - pAsm->S[1].src.swizzlez = SQ_SEL_Z; - pAsm->S[1].src.swizzlew = SQ_SEL_W; + if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF) + { + radeon_error("if/endif in shader code are not paired. \n"); + return GL_FALSE; + } + pAsm->branch_depth--; + pAsm->FCSP--; - if( GL_FALSE == tex_dst(pAsm) ) + return GL_TRUE; +} + +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; } - if( GL_FALSE == tex_src(pAsm) ) + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; + pAsm->fc_stack[pAsm->FCSP].bpush = 1; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; + pAsm->fc_stack[pAsm->FCSP].midLen = 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + pAsm->branch_depth++; + + if(pAsm->branch_depth > pAsm->max_branch_depth) + { + pAsm->max_branch_depth = pAsm->branch_depth; + } + return GL_TRUE; +} + +GLboolean assemble_BRK(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) { + radeon_error("Break is not inside loop/endloop pair.\n"); return GL_FALSE; } - if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + if(GL_FALSE == add_cf_instruction(pAsm) ) { - /* hopefully did swizzles before */ - noswizzle_PVSSRC(&(pAsm->S[0].src)); + return GL_FALSE; } - - if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) { - /* SAMPLE dst, tmp.yxwy, CUBE */ - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; + return GL_FALSE; } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; +#endif //USE_CF_FOR_CONTINUE_BREAK return GL_TRUE; } -GLboolean assemble_XPD(r700_AssemblerBase *pAsm) +GLboolean assemble_CONT(r700_AssemblerBase *pAsm) { - BITS tmp; +#ifdef USE_CF_FOR_CONTINUE_BREAK + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Continue is not inside loop/endloop pair.\n"); + return GL_FALSE; + } - if( GL_FALSE == checkop2(pAsm) ) + if(GL_FALSE == add_cf_instruction(pAsm) ) { - return GL_FALSE; + return GL_FALSE; } - tmp = gethelpr(pAsm); + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - nomask_PVSDST(&(pAsm->D.dst)); - - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; } - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#endif /* USE_CF_FOR_CONTINUE_BREAK */ + + return GL_TRUE; +} + +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) +{ + GLuint i; + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; } - - swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); - swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); - if( GL_FALSE == next_ins(pAsm) ) + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#ifdef USE_CF_FOR_CONTINUE_BREAK + for(i=0; ifc_stack[pAsm->FCSP].unNumMid; i++) + { + pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex; + } + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } +#endif + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP) { + radeon_error("loop/endloop in shader code are not paired. \n"); return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; - pAsm->D.dst.op3 = 1; + unsigned int unFCSP = 0; + if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) + { + for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) + { + unFCSP = 0; - if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) - { - tmp = gethelpr(pAsm); + returnOnFlag(pAsm); + pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; + } + } - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; + pAsm->branch_depth--; + pAsm->FCSP--; - nomask_PVSDST(&(pAsm->D.dst)); + if(unFCSP > 0) + { + breakLoopOnFlag(pAsm, unFCSP); } - else + + return GL_TRUE; +} + +void add_return_inst(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) { - if( GL_FALSE == assemble_dst(pAsm) ) + return GL_FALSE; + } + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; +} + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) +{ + /* Put in sub */ + if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) + { + pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs, + sizeof(SUB_OFFSET) * pAsm->unSubArraySize, + sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) ); + if(NULL == pAsm->subs) { return GL_FALSE; } + pAsm->unSubArraySize += 10; } - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + + pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local + = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->unSubArrayPointer++; + + /* start sub */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + return GL_TRUE; +} + +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) +{ + pAsm->CALLSP--; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + return GL_TRUE; +} + +GLboolean assemble_RET(r700_AssemblerBase *pAsm) +{ + if(pAsm->CALLSP > 0) + { /* in sub */ + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + setRetInLoopFlag(pAsm, SQ_SEL_1); + breakLoopOnFlag(pAsm, unFCSP); + pAsm->unCFflags |= LOOPRET_FLAGS; + + return GL_TRUE; + } + } + } + + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiNumberInsts, + struct prog_instruction *pILInst) +{ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; } - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + /* Put in caller */ + if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) { - return GL_FALSE; + pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, + sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, + sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) ); + if(NULL == pAsm->callers) + { + return GL_FALSE; + } + pAsm->unCallerArraySize += 10; } - - swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); - swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; - // result1 + (neg) result0 - setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[2].src.reg = tmp; + pAsm->unCallerArrayPointer++; - neg_PVSSRC(&(pAsm->S[2].src)); - noswizzle_PVSSRC(&(pAsm->S[2].src)); + int j; + for(j=0; junSubArrayPointer; j++) + { + if(nILindex == pAsm->subs[j].subIL_Offset) + { /* compiled before */ + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; + return GL_TRUE; + } + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + + return AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); +} + +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.op3 = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->flag_reg_index; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 0; +#if 0 + pAsm->S[0].src.rtype = SRC_REC_LITERAL; + //pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = flagValue; + pAsm->S[0].src.swizzley = flagValue; + pAsm->S[0].src.swizzlez = flagValue; + pAsm->S[0].src.swizzlew = flagValue; - if( GL_FALSE == next_ins(pAsm) ) + if( GL_FALSE == next_ins2(pAsm) ) { return GL_FALSE; } +#endif + return GL_TRUE; +} - if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) - { - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } +GLboolean testFlag(r700_AssemblerBase *pAsm) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + //Test flag + GLuint tmp = gethelpr(pAsm); + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - // Use tmp as source - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 1; - noneg_PVSSRC(&(pAsm->S[0].src)); - noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->flag_reg_index; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; +#if 0 + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + //pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_1; + pAsm->S[1].src.swizzley = SQ_SEL_1; + pAsm->S[1].src.swizzlez = SQ_SEL_1; + pAsm->S[1].src.swizzlew = SQ_SEL_1; + + if( GL_FALSE == next_ins2(pAsm) ) + { + return GL_FALSE; } +#endif return GL_TRUE; } -GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) +GLboolean returnOnFlag(r700_AssemblerBase *pAsm) { - return GL_TRUE; -} + testFlag(pAsm); + jumpToOffest(pAsm, 1, 4); + setRetInLoopFlag(pAsm, SQ_SEL_0); + pops(pAsm, 1); + add_return_inst(pAsm); -GLboolean assemble_IF(r700_AssemblerBase *pAsm) -{ return GL_TRUE; } -GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) { + testFlag(pAsm); + + //break + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + pops(pAsm, 1); + return GL_TRUE; } -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) { GLuint i; pR700AsmCode->pILInst = pILInst; - for(i=0; iuiCurInst = i; +#ifndef USE_CF_FOR_CONTINUE_BREAK + if(OPCODE_BRK == pILInst[i+1].Opcode) + { + switch(pILInst[i].Opcode) + { + case OPCODE_SLE: + pILInst[i].Opcode = OPCODE_SGT; + break; + case OPCODE_SLT: + pILInst[i].Opcode = OPCODE_SGE; + break; + case OPCODE_SGE: + pILInst[i].Opcode = OPCODE_SLT; + break; + case OPCODE_SGT: + pILInst[i].Opcode = OPCODE_SLE; + break; + case OPCODE_SEQ: + pILInst[i].Opcode = OPCODE_SNE; + break; + case OPCODE_SNE: + pILInst[i].Opcode = OPCODE_SEQ; + break; + default: + break; + } + } +#endif + switch (pILInst[i].Opcode) { case OPCODE_ABS: @@ -4337,101 +5749,383 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; - case OPCODE_EX2: - if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_EXP: - if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) - return GL_FALSE; + case OPCODE_EX2: + if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_EXP: + if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_FLR: + if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) + return GL_FALSE; + break; + //case OP_FLR_INT: + // if ( GL_FALSE == assemble_FLR_INT() ) + // return GL_FALSE; + // break; + + case OPCODE_FRC: + if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_KIL: + if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LG2: + if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LIT: + if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LRP: + if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LOG: + if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MAD: + if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MAX: + if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MIN: + if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MOV: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MUL: + if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_POW: + if ( GL_FALSE == assemble_POW(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RCP: + if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RSQ: + if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SIN: + if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SCS: + if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_SEQ: + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) + { + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) + { + return GL_FALSE; + } + } + break; + + case OPCODE_SGT: + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif + + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + return GL_FALSE; + } + } + break; + + case OPCODE_SGE: + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif + + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; + } + } + break; + + /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/ + case OPCODE_SLT: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif + + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } break; - case OPCODE_FLR: - if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) - return GL_FALSE; - break; - //case OP_FLR_INT: - // if ( GL_FALSE == assemble_FLR_INT() ) - // return GL_FALSE; - // break; - - case OPCODE_FRC: - if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) - return GL_FALSE; - break; + case OPCODE_SLE: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif - case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_LG2: - if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_LIT: - if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_LRP: - if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_LOG: - if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) - return GL_FALSE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } break; - case OPCODE_MAD: - if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_MAX: - if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_MIN: - if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) - return GL_FALSE; - break; - - case OPCODE_MOV: - if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_MUL: - if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) - return GL_FALSE; - break; - - case OPCODE_POW: - if ( GL_FALSE == assemble_POW(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_RCP: - if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_RSQ: - if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_SCS: - if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) - return GL_FALSE; - break; - - case OPCODE_SGE: - if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) - return GL_FALSE; - break; - case OPCODE_SLT: - if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) - return GL_FALSE; - break; + case OPCODE_SNE: + if(OPCODE_IF == pILInst[i+1].Opcode) + { + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_BRK == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) + { + return GL_FALSE; + } + } + else if(OPCODE_CONT == pILInst[i+1].Opcode) + { +#ifdef USE_CF_FOR_CONTINUE_BREAK + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; +#else + pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; +#endif + if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) + { + return GL_FALSE; + } + } + else + { + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) + { + return GL_FALSE; + } + } + break; //case OP_STP: // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) @@ -4471,24 +6165,91 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_IF : - if ( GL_FALSE == assemble_IF(pR700AsmCode) ) - return GL_FALSE; + { + GLboolean bHasElse = GL_FALSE; + + if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE) + { + bHasElse = GL_TRUE; + } + + if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) + { + return GL_FALSE; + } + } break; + case OPCODE_ELSE : - radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); - //if ( GL_FALSE == assemble_BAD("ELSE") ) + if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_ENDIF: if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_BGNLOOP: + if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BRK: + if( GL_FALSE == assemble_BRK(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CONT: + if( GL_FALSE == assemble_CONT(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_ENDLOOP: + if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BGNSUB: + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) ) + { + return GL_FALSE; + } + break; + + case OPCODE_RET: + if( GL_FALSE == assemble_RET(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CAL: + if( GL_FALSE == assemble_CAL(pR700AsmCode, + pILInst[i].BranchTarget, + uiNumberInsts, + pILInst) ) + { + return GL_FALSE; + } + break; + //case OPCODE_EXPORT: // if ( GL_FALSE == assemble_EXPORT() ) // return GL_FALSE; // break; + case OPCODE_ENDSUB: + return assemble_ENDSUB(pR700AsmCode); + case OPCODE_END: //pR700AsmCode->uiCurInst = i; //This is to remaind that if in later exoort there is depth/stencil @@ -4505,6 +6266,116 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; } +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) +{ + setRetInLoopFlag(pAsm, SQ_SEL_0); + return GL_TRUE; +} + +GLboolean RelocProgram(r700_AssemblerBase * pAsm) +{ + GLuint i; + GLuint unCFoffset; + TypedShaderList * plstCFmain; + TypedShaderList * plstCFsub; + + R700ShaderInstruction * pInst; + R700ControlFlowGenericClause * pCFInst; + + if(0 == pAsm->unSubArrayPointer) + { + return GL_TRUE; + } + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + unCFoffset = plstCFmain->uNumOfNode; + + /* Reloc subs */ + for(i=0; iunSubArrayPointer; i++) + { + pAsm->subs[i].unCFoffset = unCFoffset; + plstCFsub = &(pAsm->subs[i].lstCFInstructions_local); + + pInst = plstCFsub->pHead; + + /* reloc instructions */ + while(pInst) + { + if(SIT_CF_GENERIC == pInst->m_ShaderInstType) + { + pCFInst = (R700ControlFlowGenericClause *)pInst; + + switch (pCFInst->m_Word1.f.cf_inst) + { + case SQ_CF_INST_POP: + case SQ_CF_INST_JUMP: + case SQ_CF_INST_ELSE: + case SQ_CF_INST_LOOP_END: + case SQ_CF_INST_LOOP_START: + case SQ_CF_INST_LOOP_START_NO_AL: + case SQ_CF_INST_LOOP_CONTINUE: + case SQ_CF_INST_LOOP_BREAK: + pCFInst->m_Word0.f.addr += unCFoffset; + break; + default: + break; + } + } + + pInst->m_uIndex += unCFoffset; + + pInst = pInst->pNextInst; + }; + + /* Put sub into main */ + plstCFmain->pTail->pNextInst = plstCFsub->pHead; + plstCFmain->pTail = plstCFsub->pTail; + plstCFmain->uNumOfNode += plstCFsub->uNumOfNode; + + unCFoffset += plstCFsub->uNumOfNode; + } + + /* reloc callers */ + for(i=0; iunCallerArrayPointer; i++) + { + pAsm->callers[i].cf_ptr->m_Word0.f.addr + = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + } + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(1 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + return GL_TRUE; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -4800,6 +6671,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, } } + for(i=VERT_RESULT_VAR0; iucVP_OutputMap[i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } + // At least one param should be exported if (export_count) { @@ -4833,6 +6723,16 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) { FREE(pR700AsmCode->pucOutMask); FREE(pR700AsmCode->pInstDeps); + + if(NULL != pR700AsmCode->subs) + { + FREE(pR700AsmCode->subs); + } + if(NULL != pR700AsmCode->callers) + { + FREE(pR700AsmCode->callers); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index c66db502a1..85d32212c0 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -72,7 +72,8 @@ typedef enum SrcRegisterType SRC_REG_INPUT = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, - NUMBER_OF_SRC_REG_TYPE = 4 + SRC_REC_LITERAL = 4, + NUMBER_OF_SRC_REG_TYPE = 5 } SrcRegisterType; typedef enum DstRegisterType @@ -111,6 +112,12 @@ typedef struct PVSDSTtag BITS addrmode1:1; //32 } PVSDST; +typedef struct PVSINSTtag +{ + BITS literal :2; + BITS SaturateMode :2; +} PVSINST; + typedef struct PVSSRCtag { BITS rtype:4; @@ -148,6 +155,7 @@ typedef union PVSDWORDtag { BITS bits; PVSDST dst; + PVSINST dst2; PVSSRC src; PVSMATH math; float f; @@ -263,14 +271,15 @@ enum typedef struct FC_LEVEL { - unsigned int first; ///< first fc instruction on level (if, rep, loop) - unsigned int* mid; ///< middle instructions - else or all breaks on this level - unsigned int midLen; - unsigned int type; - unsigned int cond; - unsigned int inv; - unsigned int bpush; ///< 1 if first instruction does branch stack push - int id; ///< id of bool or int variable + R700ControlFlowGenericClause * first; + R700ControlFlowGenericClause ** mid; + unsigned int unNumMid; + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + unsigned int bpush; ///< 1 if first instruction does branch stack push + int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD @@ -279,6 +288,28 @@ typedef struct VTX_FETCH_METHOD GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; +typedef struct SUB_OFFSET +{ + GLint subIL_Offset; + GLuint unCFoffset; + TypedShaderList lstCFInstructions_local; +} SUB_OFFSET; + +typedef struct CALLER_POINTER +{ + GLint subIL_Offset; + GLint subDescIndex; + R700ControlFlowGenericClause* cf_ptr; +} CALLER_POINTER; + +#define SQ_MAX_CALL_DEPTH 0x00000020 + +typedef struct CALL_LEVEL +{ + unsigned int FCSP_BeforeEntry; + TypedShaderList * plstCFInstructions_local; +} CALL_LEVEL; + typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; @@ -294,11 +325,14 @@ typedef struct r700_AssemblerBase // No clause has been created yet CF_CLAUSE_TYPE cf_current_clause_type; + BITS alu_x_opcode; + GLuint number_of_exports; GLuint number_of_colorandz_exports; GLuint number_of_export_opcodes; PVSDWORD D; + PVSDWORD D2; PVSDWORD S[3]; unsigned int uLastPosUpdate; @@ -310,6 +344,8 @@ typedef struct r700_AssemblerBase unsigned int number_used_registers; unsigned int uUsedConsts; + unsigned int flag_reg_index; + // Fragment programs unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; @@ -378,6 +414,18 @@ typedef struct r700_AssemblerBase GLboolean is_tex; /* we inserted helper intructions and need barrier on next TEX ins */ GLboolean need_tex_barrier; + + SUB_OFFSET * subs; + GLuint unSubArraySize; + GLuint unSubArrayPointer; + CALLER_POINTER * callers; + GLuint unCallerArraySize; + GLuint unCallerArrayPointer; + unsigned int CALLSP; + CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH]; + + GLuint unCFflags; + } r700_AssemblerBase; //Internal use @@ -446,6 +494,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); + +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm); +void add_return_inst(r700_AssemblerBase *pAsm); + void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, @@ -467,6 +519,21 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); + +GLboolean next_ins2(r700_AssemblerBase *pAsm); +GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm); + +/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */ +GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); +GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); +GLboolean testFlag(r700_AssemblerBase *pAsm); +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP); +GLboolean returnOnFlag(r700_AssemblerBase *pAsm); + GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); @@ -497,14 +564,32 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); + GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); -GLboolean assemble_IF(r700_AssemblerBase *pAsm); +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse); +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm); GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm); +GLboolean assemble_BRK(r700_AssemblerBase *pAsm); +GLboolean assemble_COND(r700_AssemblerBase *pAsm); +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex); +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); +GLboolean assemble_RET(r700_AssemblerBase *pAsm); +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiNumberInsts, + struct prog_instruction *pILInst); + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -516,12 +601,16 @@ GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, //Interface -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); +GLboolean RelocProgram(r700_AssemblerBase * pAsm); +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); + int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index ec76fbcb6d..197916ac0d 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -442,68 +442,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); - R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); - R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, - pbo, - r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); + R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, + pbo, + r700->ps.SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); - R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); - R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, - pbo, - r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); + R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, + pbo, + r700->vs.SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F); + //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index ccafd433bf..21ac46e7b8 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -73,11 +73,11 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) - { - pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; - } + unBit = 1 << FRAG_ATTRIB_FOGC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } for(i=0; i<8; i++) { @@ -88,6 +88,62 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, } } +/* order has been taken care of */ +#if 1 + for(i=FRAG_ATTRIB_VAR0; iBase.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers++; + } + } +#else + if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 ) + { + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying; + struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying; + struct gl_program_parameter * pVsParam; + struct gl_program_parameter * pPsParam; + GLuint j, k; + GLuint unMaxVarying = 0; + + for(i=0; iNumParameters; i++) + { + pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0; + } + + for(i=FRAG_ATTRIB_VAR0; iBase.InputsRead & unBit) + { + j = i - FRAG_ATTRIB_VAR0; + pPsParam = PsVarying->Parameters + j; + + for(k=0; kNumParameters; k++) + { + pVsParam = VsVarying->Parameters + k; + + if( strcmp(pPsParam->Name, pVsParam->Name) == 0) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k; + if(k > unMaxVarying) + { + unMaxVarying = k; + } + break; + } + } + } + } + + pAsm->number_used_registers += unMaxVarying + 1; + } +#endif + /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -127,6 +183,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->pucOutMask[ui] = 0x0; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -247,8 +305,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, { return GL_FALSE; } + + InitShaderProgram(&(fp->r700AsmCode)); - if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions, + if( GL_FALSE == AssembleInstr(0, + mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) { @@ -260,6 +321,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) ) + { + return GL_FALSE; + } + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 : (fp->r700AsmCode.number_used_registers - 1); @@ -459,6 +525,22 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } + for(i=FRAG_ATTRIB_VAR0; iBase.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[i]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } + exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) { diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index 955ea4e4e1..2eed1acc2f 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -159,13 +159,18 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->lstVTXInstructions.uNumOfNode=0; } +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF) +{ + pShader->plstCFInstructions_active = plstCF; +} + void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) { R700ControlFlowSXClause* pSXClause; R700ControlFlowSMXClause* pSMXClause; - pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode; - AddInstToList(&(pShader->lstCFInstructions), + pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode; + AddInstToList(pShader->plstCFInstructions_active, (R700ShaderInstruction*)pCFInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index c6a058617e..0599ffd901 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -109,6 +109,7 @@ typedef struct R700_Shader GLuint uStackSize; GLuint uMaxCallDepth; + TypedShaderList * plstCFInstructions_active; TypedShaderList lstCFInstructions; TypedShaderList lstALUInstructions; TypedShaderList lstTEXInstructions; @@ -132,13 +133,13 @@ void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruc void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); - //Interface void Init_R700_Shader(R700_Shader * pShader); void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst); void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst); void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst); void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst); +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF); void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index ffc6068bd8..c8f72d588b 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } } + for(i=VERT_RESULT_VAR0; iBase.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[i] = unTotal++; + } + } + return (unTotal - unStart); } @@ -235,6 +244,8 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -324,7 +335,10 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + InitShaderProgram(&(vp->r700AsmCode)); + + if(GL_FALSE == AssembleInstr(0, + vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { @@ -336,6 +350,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) ) + { + return GL_FALSE; + } + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 : (vp->r700AsmCode.number_used_registers - 1); -- cgit v1.2.3 From de460871605c5575c5513dd1283cb61710b60cfe Mon Sep 17 00:00:00 2001 From: Richard Li Date: Wed, 18 Nov 2009 14:43:59 -0500 Subject: r600 : add some defs --- src/mesa/drivers/dri/r600/r700_assembler.c | 35 ++++++++++++++++++++++++++++-- src/mesa/drivers/dri/r600/r700_assembler.h | 21 ++++++++++++++++++ src/mesa/drivers/dri/r600/r700_shader.c | 2 +- 3 files changed, 55 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 4b5d40bd3a..6e8d1cd927 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -39,6 +39,7 @@ #include "r700_assembler.h" #define USE_CF_FOR_CONTINUE_BREAK 1 +#define USE_CF_FOR_POP_AFTER 1 BITS addrmode_PVSDST(PVSDST * pPVSDST) { @@ -489,10 +490,12 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->unCallerArrayPointer = 0; pAsm->CALLSP = 0; - pAsm->CALLSTACK[0].FCSP_BeforeEntry; + pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0; pAsm->CALLSTACK[0].plstCFInstructions_local = &(pAsm->pR700Shader->lstCFInstructions); + pAsm->CALLSTACK[0].stackUsage.bits = 0; + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); pAsm->unCFflags = 0; @@ -4978,6 +4981,21 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } +inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + break; + case FC_PUSH_WQM: + break; + case FC_LOOP: + break; + case FC_REP: + break; + }; +} + GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) { if(GL_FALSE == add_cf_instruction(pAsm) ) @@ -5024,7 +5042,7 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) return GL_TRUE; } -GLboolean assemble_IF(r700_AssemblerBase *pAsm) +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { if(GL_FALSE == add_cf_instruction(pAsm) ) { @@ -5056,10 +5074,12 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm) pAsm->fc_stack[pAsm->FCSP].midLen= 0; pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; +#ifndef USE_CF_FOR_POP_AFTER if(GL_TRUE != bHasElse) { pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; } +#endif /* USE_CF_FOR_POP_AFTER */ pAsm->branch_depth++; @@ -5072,6 +5092,10 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm) GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) { +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5094,7 +5118,9 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; +#ifndef USE_CF_FOR_POP_AFTER pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; +#endif /* USE_CF_FOR_POP_AFTER */ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; @@ -5103,6 +5129,10 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) @@ -5410,6 +5440,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.bits = 0; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 85d32212c0..516923f67c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -259,6 +259,8 @@ enum FC_IF = 1, FC_LOOP = 2, FC_REP = 3, + FC_PUSH_VPM = 4, + FC_PUSH_WQM = 5, COND_NONE = 0, COND_BOOL = 1, @@ -304,12 +306,30 @@ typedef struct CALLER_POINTER #define SQ_MAX_CALL_DEPTH 0x00000020 +typedef struct STACK_USAGE +{ + BITS pushs :8; + BITS current :8; + BITS max :8; +} STACK_USAGE; + +typedef union STACKDWORDtag +{ + BITS bits; + STACK_USAGE su; +} STACKDWORD; + typedef struct CALL_LEVEL { unsigned int FCSP_BeforeEntry; + STACKDWORD stackUsage; TypedShaderList * plstCFInstructions_local; } CALL_LEVEL; +#define HAS_CURRENT_LOOPRET 0x1L +#define HAS_LOOPRET 0x2L +#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET + typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; @@ -429,6 +449,7 @@ typedef struct r700_AssemblerBase } r700_AssemblerBase; //Internal use +inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason); BITS addrmode_PVSDST(PVSDST * pPVSDST); void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); void nomask_PVSDST(PVSDST * pPVSDST); diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index 2eed1acc2f..db951e48c4 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -140,7 +140,7 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->killIsUsed = GL_FALSE; pShader->uCFOffset = 0; - pShader->uStackSize = 0; + pShader->uStackSize = 10; //richard test pShader->uMaxCallDepth = 0; pShader->bSurfAllocated = GL_FALSE; -- cgit v1.2.3 From 6345a7ba447d3e04b939ead6fee44fe9201ec2e3 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Thu, 19 Nov 2009 16:05:43 -0500 Subject: r600 : check in shader code test enable flag: if flag R600_ENABLE_GLSL_TEST defined, IL shader code will goto r600 assembler. The test base is /mesa/progs/glsl/brick, and changes shader code in CH06-brick.frag/vert to test different logic op combination. (if,else,while,function,...). The stack depth code is not in yet, so it is hard coded now. So complex code would not run (such as things like 8 loops embeded loop in loop). --- src/mesa/drivers/dri/r600/r600_context.c | 54 +++++++++++++++++++++++++++--- src/mesa/drivers/dri/r600/r700_assembler.c | 44 +++++++++++++++--------- 2 files changed, 77 insertions(+), 21 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index dbd233729c..ca0a670f3c 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -72,7 +72,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vblank.h" #include "utils.h" -#include "xmlpool.h" /* for symbolic values of enum-type options */ +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +//#define R600_ENABLE_GLSL_TEST 1 #define need_GL_VERSION_2_0 #define need_GL_ARB_occlusion_query @@ -154,8 +156,12 @@ static const struct dri_extension mm_extensions[] = { * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. */ -static const struct dri_extension gl_20_extension[] = { - {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +static const struct dri_extension gl_20_extension[] = { +#ifdef R600_ENABLE_GLSL_TEST + {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, +#else + {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +#endif /* R600_ENABLE_GLSL_TEST */ }; static const struct tnl_pipeline_stage *r600_pipeline[] = { @@ -306,7 +312,28 @@ static void r600InitGLExtensions(GLcontext *ctx) driInitExtensions(ctx, card_extensions, GL_TRUE); if (r600->radeon.radeonScreen->kernel_mm) - driInitExtensions(ctx, mm_extensions, GL_FALSE); + driInitExtensions(ctx, mm_extensions, GL_FALSE); + +#ifdef R600_ENABLE_GLSL_TEST + driInitExtensions(ctx, gl_20_extension, GL_TRUE); + //_mesa_enable_2_0_extensions(ctx); + //1.5 + ctx->Extensions.ARB_occlusion_query = GL_TRUE; + ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; + ctx->Extensions.EXT_shadow_funcs = GL_TRUE; + //2.0 + ctx->Extensions.ARB_draw_buffers = GL_TRUE; + ctx->Extensions.ARB_point_sprite = GL_TRUE; + ctx->Extensions.ARB_shader_objects = GL_TRUE; + ctx->Extensions.ARB_vertex_shader = GL_TRUE; + ctx->Extensions.ARB_fragment_shader = GL_TRUE; + ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE; + ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; + ctx->Extensions.ATI_separate_stencil = GL_TRUE; + + /* glsl compiler has problem if this is not GL_TRUE */ + ctx->Shader.EmitCondCodes = GL_TRUE; +#endif /* R600_ENABLE_GLSL_TEST */ if (driQueryOptionb (&r600->radeon.optionCache, "disable_stencil_two_side")) @@ -341,7 +368,24 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, assert(glVisual); assert(driContextPriv); - assert(screen); + assert(screen); + + //richard test + FILE *pFile = NULL; + unsigned long ulByteToWrite = 0; + char szStr[1024]; + + pFile = fopen("//home//richard//rtp-log//func_call.log", "a+"); + if(NULL != pFile) + { + sprintf(szStr, "r600CreateContext \r\n"); + ulByteToWrite = strlen(szStr); + fwrite(szStr, 1, ulByteToWrite, pFile); + + fclose(pFile); + pFile = NULL; + } + //------------- /* Allocate the R600 context */ r600 = (context_t*) CALLOC(sizeof(*r600)); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 6e8d1cd927..16ac920f29 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4983,17 +4983,31 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason) { - switch (uReason) - { - case FC_PUSH_VPM: - break; - case FC_PUSH_WQM: - break; - case FC_LOOP: - break; - case FC_REP: - break; - }; + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs + > pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max) + { + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max = + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs; + } } GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) @@ -5092,10 +5106,6 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) { -#ifdef USE_CF_FOR_POP_AFTER - pops(pAsm, 1); -#endif /* USE_CF_FOR_POP_AFTER */ - if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5647,7 +5657,9 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) { return GL_FALSE; } -#endif +#endif + + checkStackDepth(pAsm, FC_PUSH_VPM); return GL_TRUE; } -- cgit v1.2.3 From 48dfd3938e428295c45692cfde0a2afff04a7970 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Thu, 19 Nov 2009 16:55:16 -0500 Subject: r600 : change shader pop method for now. --- src/mesa/drivers/dri/r600/r700_assembler.c | 54 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 16ac920f29..e3bc46802f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -39,7 +39,7 @@ #include "r700_assembler.h" #define USE_CF_FOR_CONTINUE_BREAK 1 -#define USE_CF_FOR_POP_AFTER 1 +//#define USE_CF_FOR_POP_AFTER 1 BITS addrmode_PVSDST(PVSDST * pPVSDST) { @@ -4983,30 +4983,30 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason) { - switch (uReason) - { - case FC_PUSH_VPM: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current++; - break; - case FC_PUSH_WQM: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current += 4; - break; - case FC_LOOP: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 4; - break; - case FC_REP: - /* TODO : for 16 vp asic, should += 2; */ - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 1; - break; - }; - - if(pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs - > pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max) - { - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max = - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs; + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs + > pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max) + { + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max = + pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs; } } @@ -5657,8 +5657,8 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) { return GL_FALSE; } -#endif - +#endif + checkStackDepth(pAsm, FC_PUSH_VPM); return GL_TRUE; -- cgit v1.2.3 From a02938a8421270389178d4969a5411a1691d929a Mon Sep 17 00:00:00 2001 From: Richard Li Date: Fri, 20 Nov 2009 16:36:55 -0500 Subject: r600 : use cf for all pop now, left optimization for future. --- src/mesa/drivers/dri/r600/r700_assembler.c | 56 ++++++++++++++++++++++-------- src/mesa/drivers/dri/r600/r700_assembler.h | 2 +- 2 files changed, 42 insertions(+), 16 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e3bc46802f..27083a895c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -39,7 +39,7 @@ #include "r700_assembler.h" #define USE_CF_FOR_CONTINUE_BREAK 1 -//#define USE_CF_FOR_POP_AFTER 1 +#define USE_CF_FOR_POP_AFTER 1 BITS addrmode_PVSDST(PVSDST * pPVSDST) { @@ -3022,7 +3022,6 @@ GLboolean next_ins2(r700_AssemblerBase *pAsm) pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; - //richard nov.16 glsl pAsm->D2.bits = 0; return GL_TRUE; @@ -5377,32 +5376,35 @@ GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) return GL_FALSE; } - unsigned int unFCSP = 0; + GLuint unFCSP; + GLuint unIF = 0; if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) { for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) { if(FC_LOOP == pAsm->fc_stack[unFCSP].type) { + breakLoopOnFlag(pAsm, unFCSP); break; } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } } if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) { - unFCSP = 0; - - returnOnFlag(pAsm); +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; } } pAsm->branch_depth--; pAsm->FCSP--; - - if(unFCSP > 0) - { - breakLoopOnFlag(pAsm, unFCSP); - } return GL_TRUE; } @@ -5459,25 +5461,38 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) /* start sub */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + return GL_TRUE; } GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) { + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + pAsm->CALLSP--; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->FCSP--; + return GL_TRUE; } GLboolean assemble_RET(r700_AssemblerBase *pAsm) { + GLuint unIF = 0; + if(pAsm->CALLSP > 0) { /* in sub */ - unsigned int unFCSP; + GLuint unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) { if(FC_LOOP == pAsm->fc_stack[unFCSP].type) @@ -5488,9 +5503,20 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) return GL_TRUE; } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } } } - + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + add_return_inst(pAsm); return GL_TRUE; @@ -5664,12 +5690,12 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean returnOnFlag(r700_AssemblerBase *pAsm) +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) { testFlag(pAsm); jumpToOffest(pAsm, 1, 4); setRetInLoopFlag(pAsm, SQ_SEL_0); - pops(pAsm, 1); + pops(pAsm, unIF + 1); add_return_inst(pAsm); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 516923f67c..ca562d29f1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -553,7 +553,7 @@ GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); GLboolean testFlag(r700_AssemblerBase *pAsm); GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP); -GLboolean returnOnFlag(r700_AssemblerBase *pAsm); +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF); GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From 1f8c23d9db84178f5b129dcd5f6dbae4a31f796a Mon Sep 17 00:00:00 2001 From: Richard Li Date: Sun, 22 Nov 2009 12:58:28 -0500 Subject: r600 : add stack depth calculation, enable CF pop. --- src/mesa/drivers/dri/r600/r700_assembler.c | 341 ++++++++++++++++++----------- src/mesa/drivers/dri/r600/r700_assembler.h | 41 ++-- src/mesa/drivers/dri/r600/r700_shader.c | 2 +- 3 files changed, 223 insertions(+), 161 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 27083a895c..16cdb741ae 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -388,99 +388,94 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->pR700Shader = pShader; pAsm->currentShaderType = spt; - pAsm->cf_last_export_ptr = NULL; + pAsm->cf_last_export_ptr = NULL; - pAsm->cf_current_export_clause_ptr = NULL; - pAsm->cf_current_alu_clause_ptr = NULL; - pAsm->cf_current_tex_clause_ptr = NULL; - pAsm->cf_current_vtx_clause_ptr = NULL; - pAsm->cf_current_cf_clause_ptr = NULL; + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; - // No clause has been created yet - pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; - pAsm->number_of_colorandz_exports = 0; - pAsm->number_of_exports = 0; - pAsm->number_of_export_opcodes = 0; + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; pAsm->alu_x_opcode = 0; pAsm->D2.bits = 0; - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; - pAsm->uLastPosUpdate = 0; + pAsm->uLastPosUpdate = 0; - *(BITS *) &pAsm->fp_stOutFmt0 = 0; - - pAsm->uIIns = 0; - pAsm->uOIns = 0; - pAsm->number_used_registers = 0; - pAsm->uUsedConsts = 256; - + *(BITS *) &pAsm->fp_stOutFmt0 = 0; - // Fragment programs - pAsm->uBoolConsts = 0; - pAsm->uIntConsts = 0; - pAsm->uInsts = 0; - pAsm->uConsts = 0; + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; - pAsm->FCSP = 0; - pAsm->fc_stack[0].type = FC_NONE; - pAsm->branch_depth = 0; - pAsm->max_branch_depth = 0; + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; - pAsm->aArgSubst[0] = - pAsm->aArgSubst[1] = - pAsm->aArgSubst[2] = - pAsm->aArgSubst[3] = (-1); + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; - pAsm->uOutputs = 0; - - for (i=0; icolor_export_register_number[i] = (-1); - } + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); + pAsm->uOutputs = 0; - pAsm->depth_export_register_number = (-1); - pAsm->stencil_export_register_number = (-1); - pAsm->coverage_to_mask_export_register_number = (-1); - pAsm->mask_export_register_number = (-1); + for (i=0; icolor_export_register_number[i] = (-1); + } - pAsm->starting_export_register_number = 0; - pAsm->starting_vfetch_register_number = 0; - pAsm->starting_temp_register_number = 0; - pAsm->uFirstHelpReg = 0; + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); - pAsm->input_position_is_used = GL_FALSE; - pAsm->input_normal_is_used = GL_FALSE; + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; - for (i=0; iinput_color_is_used[ i ] = GL_FALSE; - } + for (i=0; iinput_color_is_used[ i ] = GL_FALSE; + } - for (i=0; iinput_texture_unit_is_used[ i ] = GL_FALSE; - } + for (i=0; iinput_texture_unit_is_used[ i ] = GL_FALSE; + } - for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; - } + for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; + } - pAsm->number_of_inputs = 0; + pAsm->number_of_inputs = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; pAsm->subs = NULL; pAsm->unSubArraySize = 0; @@ -494,13 +489,14 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->CALLSTACK[0].plstCFInstructions_local = &(pAsm->pR700Shader->lstCFInstructions); - pAsm->CALLSTACK[0].stackUsage.bits = 0; + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); pAsm->unCFflags = 0; - return 0; + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) @@ -4980,32 +4976,74 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason) +inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) { + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + switch (uReason) { case FC_PUSH_VPM: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current++; + pAsm->CALLSTACK[pAsm->CALLSP].current++; break; case FC_PUSH_WQM: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++; - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current += 4; + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; break; case FC_LOOP: - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 4; + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; break; case FC_REP: /* TODO : for 16 vp asic, should += 2; */ - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 1; + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; break; }; - if(pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs - > pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max) + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) { - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max = - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs; + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; } } @@ -5082,7 +5120,6 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_IF; - pAsm->fc_stack[pAsm->FCSP].bpush = 0; pAsm->fc_stack[pAsm->FCSP].mid = NULL; pAsm->fc_stack[pAsm->FCSP].midLen= 0; pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; @@ -5094,12 +5131,8 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) } #endif /* USE_CF_FOR_POP_AFTER */ - pAsm->branch_depth++; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); - if(pAsm->branch_depth > pAsm->max_branch_depth) - { - pAsm->max_branch_depth = pAsm->branch_depth; - } return GL_TRUE; } @@ -5164,9 +5197,11 @@ GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) radeon_error("if/endif in shader code are not paired. \n"); return GL_FALSE; } - pAsm->branch_depth--; + pAsm->FCSP--; + decreaseCurrent(pAsm, FC_PUSH_VPM); + return GL_TRUE; } @@ -5191,18 +5226,13 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) pAsm->FCSP++; pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; - pAsm->fc_stack[pAsm->FCSP].bpush = 1; pAsm->fc_stack[pAsm->FCSP].mid = NULL; pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; pAsm->fc_stack[pAsm->FCSP].midLen = 0; pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; - pAsm->branch_depth++; + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); - if(pAsm->branch_depth > pAsm->max_branch_depth) - { - pAsm->max_branch_depth = pAsm->branch_depth; - } return GL_TRUE; } @@ -5266,6 +5296,8 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + #endif //USE_CF_FOR_CONTINUE_BREAK return GL_TRUE; } @@ -5330,6 +5362,8 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + #endif /* USE_CF_FOR_CONTINUE_BREAK */ return GL_TRUE; @@ -5403,8 +5437,9 @@ GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) } } - pAsm->branch_depth--; pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_LOOP); return GL_TRUE; } @@ -5445,14 +5480,16 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; - pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; - pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); - pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.bits = 0; + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); @@ -5462,7 +5499,9 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->alu_x_opcode = SQ_CF_INST_ALU; pAsm->FCSP++; - pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); return GL_TRUE; } @@ -5475,6 +5514,12 @@ GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) return GL_FALSE; } + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + pAsm->CALLSP--; SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); @@ -5565,18 +5610,42 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->unCallerArrayPointer++; int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; for(j=0; junSubArrayPointer; j++) { if(nILindex == pAsm->subs[j].subIL_Offset) { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; return GL_TRUE; } } pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + } - return AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + return bRet; } GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) @@ -5685,7 +5754,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) } #endif - checkStackDepth(pAsm, FC_PUSH_VPM); + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); return GL_TRUE; } @@ -5704,7 +5773,7 @@ GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) { testFlag(pAsm); - + //break if(GL_FALSE == add_cf_instruction(pAsm) ) { @@ -5831,7 +5900,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) return GL_FALSE; break; - //case OP_FLR_INT: + //case OP_FLR_INT: ; + // if ( GL_FALSE == assemble_FLR_INT() ) // return GL_FALSE; // break; @@ -6351,11 +6421,47 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(1 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + if(0 == pAsm->unSubArrayPointer) { return GL_TRUE; } + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; unCFoffset = plstCFmain->uNumOfNode; @@ -6411,37 +6517,6 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; } - /* remove flags init if they are not used */ - if((pAsm->unCFflags & HAS_LOOPRET) == 0) - { - R700ControlFlowALUClause * pCF_ALU; - pInst = plstCFmain->pHead; - while(pInst) - { - if(SIT_CF_ALU == pInst->m_ShaderInstType) - { - pCF_ALU = (R700ControlFlowALUClause *)pInst; - if(1 == pCF_ALU->m_Word1.f.count) - { - pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; - } - else - { - R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; - - pALU->m_pLinkedALUClause = NULL; - pALU = (R700ALUInstruction *)(pALU->pNextInst); - pALU->m_pLinkedALUClause = pCF_ALU; - pCF_ALU->m_pLinkedALUInstruction = pALU; - - pCF_ALU->m_Word1.f.count--; - } - break; - } - pInst = pInst->pNextInst; - }; - } - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index ca562d29f1..7efb346fa7 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -273,27 +273,27 @@ enum typedef struct FC_LEVEL { - R700ControlFlowGenericClause * first; + R700ControlFlowGenericClause * first; R700ControlFlowGenericClause ** mid; unsigned int unNumMid; - unsigned int midLen; - unsigned int type; - unsigned int cond; - unsigned int inv; - unsigned int bpush; ///< 1 if first instruction does branch stack push - int id; ///< id of bool or int variable + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD { - GLboolean bEnableMini; - GLuint mega_fetch_remainder; + GLboolean bEnableMini; + GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; typedef struct SUB_OFFSET { GLint subIL_Offset; GLuint unCFoffset; + GLuint unStackDepthMax; TypedShaderList lstCFInstructions_local; } SUB_OFFSET; @@ -306,23 +306,12 @@ typedef struct CALLER_POINTER #define SQ_MAX_CALL_DEPTH 0x00000020 -typedef struct STACK_USAGE -{ - BITS pushs :8; - BITS current :8; - BITS max :8; -} STACK_USAGE; - -typedef union STACKDWORDtag -{ - BITS bits; - STACK_USAGE su; -} STACKDWORD; - typedef struct CALL_LEVEL { unsigned int FCSP_BeforeEntry; - STACKDWORD stackUsage; + GLint subDescIndex; + GLushort current; + GLushort max; TypedShaderList * plstCFInstructions_local; } CALL_LEVEL; @@ -386,9 +375,6 @@ typedef struct r700_AssemblerBase unsigned int FCSP; FC_LEVEL fc_stack[32]; - unsigned int branch_depth; - unsigned int max_branch_depth; - //----------------------------------------------------------------------------------- // ArgSubst used in Assemble_Source() function //----------------------------------------------------------------------------------- @@ -449,7 +435,8 @@ typedef struct r700_AssemblerBase } r700_AssemblerBase; //Internal use -inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason); +inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly); +inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason); BITS addrmode_PVSDST(PVSDST * pPVSDST); void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); void nomask_PVSDST(PVSDST * pPVSDST); diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index db951e48c4..2eed1acc2f 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -140,7 +140,7 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->killIsUsed = GL_FALSE; pShader->uCFOffset = 0; - pShader->uStackSize = 10; //richard test + pShader->uStackSize = 0; pShader->uMaxCallDepth = 0; pShader->bSurfAllocated = GL_FALSE; -- cgit v1.2.3 From f9b0f1dfa1695db79553f67fd0c156d445062ffa Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 23 Nov 2009 06:31:29 +1000 Subject: r600: fix inline issues --- src/mesa/drivers/dri/r600/r700_assembler.c | 4 ++-- src/mesa/drivers/dri/r600/r700_assembler.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 16cdb741ae..c46dd757d0 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4976,7 +4976,7 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) { switch (uReason) { @@ -4996,7 +4996,7 @@ inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) }; } -inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) { if(GL_TRUE == bCheckMaxOnly) { diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 7efb346fa7..3e4106335a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -435,8 +435,6 @@ typedef struct r700_AssemblerBase } r700_AssemblerBase; //Internal use -inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly); -inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason); BITS addrmode_PVSDST(PVSDST * pPVSDST); void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); void nomask_PVSDST(PVSDST * pPVSDST); -- cgit v1.2.3 From c3c8c40cab193e0aa0f1a42bff7b0d726df8cf9f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 23 Nov 2009 06:44:29 +1000 Subject: r600: hopefully fix segfault. --- src/mesa/drivers/dri/r600/r700_assembler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index c46dd757d0..702add9772 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -6421,6 +6421,8 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + /* remove flags init if they are not used */ if((pAsm->unCFflags & HAS_LOOPRET) == 0) { @@ -6462,7 +6464,6 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; } - plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; unCFoffset = plstCFmain->uNumOfNode; /* Reloc subs */ -- cgit v1.2.3 From a12b468d002edb6c8a7c95882edd3e5e7f615a4e Mon Sep 17 00:00:00 2001 From: Richard Li Date: Sun, 22 Nov 2009 21:31:46 -0500 Subject: r600 : add support for shader instruction trunc and discard. --- src/mesa/drivers/dri/r600/r700_assembler.c | 105 +++++++++++++++++++++-------- src/mesa/drivers/dri/r600/r700_assembler.h | 2 +- 2 files changed, 78 insertions(+), 29 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 702add9772..8e57396a0d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -340,7 +340,10 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) switch (pAsm->D.dst.opcode) { case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -363,6 +366,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_TRUNC: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -1379,19 +1383,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_PNTC: fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); break; - case FRAG_ATTRIB_VAR0: - fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); - break; } if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || - (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) - { + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - } + } break; } @@ -2469,9 +2470,9 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } + { + return GL_FALSE; + } Init_R700ALUInstruction(alu_instruction_ptr); //src 0 @@ -3545,13 +3546,12 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_KIL(r700_AssemblerBase *pAsm) -{ - /* TODO: doc says KILL has to be last(end) ALU clause */ - - checkop1(pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + checkop2(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3561,24 +3561,24 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = 0; - - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); - noneg_PVSSRC(&(pAsm->S[0].src)); + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } - if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins2(pAsm) ) { return GL_FALSE; } + /* Doc says KILL has to be last(end) ALU clause */ pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -5018,7 +5018,7 @@ static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLb pAsm->CALLSTACK[pAsm->CALLSP].current + 4; } break; - } + } return; } @@ -5102,7 +5102,7 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) if(GL_TRUE != bHasElse) { - pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; } else { @@ -5172,7 +5172,7 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_POP_AFTER - pops(pAsm, 1); + pops(pAsm, 1); #endif /* USE_CF_FOR_POP_AFTER */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; @@ -5912,8 +5912,10 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) - return GL_FALSE; + case OPCODE_KIL_NV: + /* done at OPCODE_SE/SGT...etc. */ + /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + return GL_FALSE; */ break; case OPCODE_LG2: if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) @@ -6008,6 +6010,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) @@ -6051,6 +6060,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) @@ -6094,6 +6110,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) @@ -6150,6 +6173,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) @@ -6210,6 +6240,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) @@ -6257,6 +6294,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; } } + else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + { + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) ) + { + return GL_FALSE; + } + } else { if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) @@ -6298,6 +6342,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + case OPCODE_XPD: if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) return GL_FALSE; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 3e4106335a..130fc89dae 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -554,7 +554,7 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); -GLboolean assemble_KIL(r700_AssemblerBase *pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); GLboolean assemble_LOG(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From d4c2f53ca56beb8fe9289fb17c3f5fcc2cc7dc10 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 24 Nov 2009 12:16:39 -0500 Subject: r600 : fix stack depth setting bug. --- src/mesa/drivers/dri/r600/r700_assembler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 8e57396a0d..0c16594adc 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -6503,14 +6503,14 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) }; } - if(0 == pAsm->unSubArrayPointer) + if(pAsm->CALLSTACK[0].max > 0) { - return GL_TRUE; + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; } - if(pAsm->CALLSTACK[0].max > 0) + if(0 == pAsm->unSubArrayPointer) { - pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + return GL_TRUE; } unCFoffset = plstCFmain->uNumOfNode; -- cgit v1.2.3 From 11dce740305ea3f45966a9e9f72ba94b4eae6d40 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 24 Nov 2009 16:00:25 -0500 Subject: r600 : reset stack flag with one channel only. --- src/mesa/drivers/dri/r600/r700_assembler.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0c16594adc..ba97d3e073 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -5663,6 +5663,8 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D2.dst2.literal = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ #if 0 pAsm->S[0].src.rtype = SRC_REC_LITERAL; //pAsm->S[0].src.reg = 0; @@ -6457,6 +6459,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) { setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -6482,7 +6485,7 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) if(SIT_CF_ALU == pInst->m_ShaderInstType) { pCF_ALU = (R700ControlFlowALUClause *)pInst; - if(1 == pCF_ALU->m_Word1.f.count) + if(0 == pCF_ALU->m_Word1.f.count) { pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; } -- cgit v1.2.3 From 2db72f329f35ee6e12df3ed472de2ee72cf23399 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Sun, 29 Nov 2009 12:12:19 -0500 Subject: r600 : add read port allocation for uniform; mapping ps input based on vs output; fix bugs including constants updating for vs. --- src/mesa/drivers/dri/r600/r700_assembler.c | 46 +++++++++------- src/mesa/drivers/dri/r600/r700_fragprog.c | 85 +++++++++++++++++------------- src/mesa/drivers/dri/r600/r700_fragprog.h | 6 ++- src/mesa/drivers/dri/r600/r700_vertprog.c | 26 +++++++-- 4 files changed, 101 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index ba97d3e073..309c90fdd0 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1038,7 +1038,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1049,7 +1050,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1082,7 +1084,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1093,7 +1096,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1104,7 +1108,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[1] = GL_FALSE; } - if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) @@ -1218,7 +1223,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; break; case PROGRAM_INPUT: - setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_INPUT; switch (pAsm->currentShaderType) { @@ -1346,6 +1351,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) else { switch (pILInst->SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -2117,7 +2123,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_gpr(sel) ) { if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) - { + { return GL_FALSE; } @@ -2129,7 +2135,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, else { if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) - { + { return GL_FALSE; } } @@ -2141,7 +2147,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_cfile(sel) ) { if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) - { + { return GL_FALSE; } } @@ -2244,7 +2250,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2258,7 +2264,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } @@ -2287,7 +2293,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2401,7 +2407,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2412,15 +2418,15 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; + { + return GL_FALSE; } } @@ -2667,7 +2673,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) { - return 1; + return GL_FALSE; } } @@ -3642,6 +3648,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) { return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) { return GL_FALSE; @@ -4598,6 +4605,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -6867,7 +6875,7 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } } - + for(i=VERT_RESULT_VAR0; iucVP_OutputMap[i], GL_FALSE) ) - { + { return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 21ac46e7b8..e9ef6c8695 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -44,12 +44,18 @@ //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { unsigned int unBit; unsigned int i; GLuint ui; + /* match fp inputs with vp exports. */ + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + pAsm->number_used_registers = 0; //Input mapping : mesa_fp->Base.InputsRead set the flag, set in @@ -61,41 +67,41 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) { - pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; } for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } -/* order has been taken care of */ +/* order has been taken care of */ #if 1 - for(i=FRAG_ATTRIB_VAR0; iBase.InputsRead & unBit) + if(OutputsWritten & unBit) { - pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers++; + pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++; } } #else @@ -291,7 +297,8 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, } GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; @@ -299,7 +306,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp); + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { @@ -366,7 +373,7 @@ void r700SelectFragmentShader(GLcontext *ctx) } if (GL_FALSE == fp->translated) - r700TranslateFragmentShader(fp, &(fp->mesa_program)); + r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); } void * r700GetActiveFpShaderBo(GLcontext * ctx) @@ -460,6 +467,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) EXPORT_MODE_shift, EXPORT_MODE_mask); // emit ps input map + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -473,8 +483,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -486,8 +496,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -499,8 +509,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -514,8 +524,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -525,12 +535,12 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } - for(i=FRAG_ATTRIB_VAR0; iBase.InputsRead & unBit) + unBit = 1 << i; + if(OutputsWritten & unBit) { - ui = pAsm->uiFP_AttributeMap[i]; + ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); @@ -538,8 +548,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - } - } + } + } exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) @@ -551,7 +561,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; - if(NULL != paramList) { + if(NULL != paramList) + { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -564,10 +575,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } else r700->ps.num_consts = 0; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index cbb108d212..843de2c029 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -49,12 +49,14 @@ struct r700_fragment_program /* Internal */ void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp, + GLcontext *ctx); //richard glsl nov.27 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_fp); GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_vp); + struct gl_fragment_program *mesa_vp, + GLcontext *ctx); //richard glsl nov.27 /* Interface */ extern void r700SelectFragmentShader(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index c8f72d588b..7715214da1 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -631,6 +631,14 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { + //vp->mesa_program was cloned, not updated by glsl shader api. + //_mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current + // so, use ctx->VertexProgem._Current + struct gl_program_parameter_list *paramListOrginal = + paramListOrginal = ctx->VertexProgram._Current->Base.Parameters; + + //--------------------------- + _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -643,10 +651,20 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uivs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) + { + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + } + else + { + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } } } else r700->vs.num_consts = 0; -- cgit v1.2.3 From 369669ff9a7ff7636cadef8e2b13f2f28face98f Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Thu, 3 Dec 2009 12:26:44 +0200 Subject: r600: add support for TXB instruction makes testing other things easier - does not hang the card TODO: enable TEX dependency tracking in vertex programs --- src/mesa/drivers/dri/r600/r700_assembler.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 6ff08e1cfb..be875ae6b8 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3450,22 +3450,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) - { - case OPCODE_TEX: - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); - return GL_FALSE; - break; - case OPCODE_TXP: - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); - return GL_FALSE; - break; - } - if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); @@ -3644,7 +3628,15 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB) + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + } + else + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) { -- cgit v1.2.3 From 94c6ec5809b08676f12628b49dd88ec694d07a48 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Thu, 3 Dec 2009 18:12:45 +0200 Subject: r600: execute SET funtions on all channels seems assemble_LOGIC was meant for non-condition-code instructions so execute in for all components as previously --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index cf64d170ed..fe006ef19c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4467,7 +4467,7 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) } pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) { -- cgit v1.2.3 From c1d79a4235fa2edb05e92f9b93a105ff356a4a18 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 4 Dec 2009 11:37:15 +0200 Subject: r600: wip glsl - refactor conditional instructions a bit remember the dst register which is used for cond updates when it's time to use the cond codes issue a separate PRED instruction --- src/mesa/drivers/dri/r600/r700_assembler.c | 379 ++++++----------------------- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + 2 files changed, 70 insertions(+), 310 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fe006ef19c..87c1638de4 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3557,7 +3557,7 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) checkop2(pAsm); pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3567,17 +3567,23 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) { return GL_FALSE; } - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + /*if( GL_FALSE == assemble_src(pAsm, 1, -1) ) { return GL_FALSE; } - - if ( GL_FALSE == next_ins2(pAsm) ) + */ + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4494,30 +4500,32 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) { - if( GL_FALSE == checkop2(pAsm) ) - { - return GL_FALSE; - } + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; pAsm->D.dst.predicated = 1; - pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; if( GL_FALSE == next_ins2(pAsm) ) { @@ -5098,6 +5106,11 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5242,6 +5255,11 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) GLboolean assemble_BRK(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5308,6 +5326,10 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) GLboolean assemble_CONT(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5848,6 +5870,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } #endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } switch (pILInst[i].Opcode) { @@ -5918,9 +5946,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_KIL: case OPCODE_KIL_NV: - /* done at OPCODE_SE/SGT...etc. */ - /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) - return GL_FALSE; */ + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + return GL_FALSE; break; case OPCODE_LG2: if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) @@ -5983,151 +6010,23 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SEQ: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGT: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) - { - return GL_FALSE; - } + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; } break; @@ -6139,61 +6038,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; } @@ -6206,60 +6056,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; @@ -6267,51 +6068,9 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SNE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 130fc89dae..ef1f924add 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -345,6 +345,7 @@ typedef struct r700_AssemblerBase PVSDWORD S[3]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; -- cgit v1.2.3 From 323d1fb3910d7e53cb5200ee90849b2231fd96fb Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 4 Dec 2009 12:58:36 +0200 Subject: r600: quick hack to get KIL_NV working - does condition TR only for now --- src/mesa/drivers/dri/r600/r700_assembler.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 87c1638de4..3738edbc2f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3554,7 +3554,10 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) { - checkop2(pAsm); + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); pAsm->D.dst.opcode = opcode; //pAsm->D.dst.math = 1; @@ -3573,16 +3576,23 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - - /*if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + else { - return GL_FALSE; + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + } - */ + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; -- cgit v1.2.3 From 50ab51101e4c5e64ab92c260c324fb66a98851a8 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 4 Dec 2009 16:36:41 +0200 Subject: r600: glsl - allow specifying texture sampler via uniforms looks kinda hackish, should rethink later --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + src/mesa/drivers/dri/r600/r700_fragprog.c | 5 +++++ src/mesa/drivers/dri/r600/r700_vertprog.c | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 3738edbc2f..158c5fa549 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4840,7 +4840,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index ef1f924add..48ffef501f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -416,6 +416,7 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 0cb9707ee6..8eb439a951 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -308,6 +308,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit; + int i; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); @@ -320,6 +321,10 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, InitShaderProgram(&(fp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } if( GL_FALSE == AssembleInstr(0, mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index d3d1da7959..759b74dc7e 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -337,6 +337,10 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, InitShaderProgram(&(vp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } if(GL_FALSE == AssembleInstr(0, vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), -- cgit v1.2.3 From 17e212e2631cd652c28378399806c3b3bd293e9a Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 11:51:36 +0200 Subject: r600: add ABS support for source regs to assembler use it in tex cube instruction sequence --- src/mesa/drivers/dri/r600/r700_assembler.c | 27 +++++---------------------- src/mesa/drivers/dri/r600/r700_assembler.h | 7 ++++--- 2 files changed, 9 insertions(+), 25 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 158c5fa549..2f8038adb3 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2350,8 +2350,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2379,8 +2379,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -4721,24 +4721,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); - /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4751,6 +4733,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 48ffef501f..cfa2610a55 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -120,14 +120,15 @@ typedef struct PVSINSTtag typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; -- cgit v1.2.3 From 602ba357edd640e0db17911b39d3ecfbf5675230 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 13:04:32 +0200 Subject: r600: merge alu_instruction/alu_instruction2 --- src/mesa/drivers/dri/r600/r700_assembler.c | 320 +++-------------------------- src/mesa/drivers/dri/r600/r700_assembler.h | 6 +- 2 files changed, 29 insertions(+), 297 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2f8038adb3..8155d53eeb 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1292,6 +1292,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -2270,7 +2279,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2282,9 +2291,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; } - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || @@ -2323,7 +2340,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; if (pAsm->D.dst.op3) { @@ -2436,253 +2453,6 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) -{ - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - PVSSRC * pcurrent_source; - int current_source_index; - GLuint contiguous_slots_needed; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - if (1 == pAsm->D.dst.math) - { - is_single_scalar_operation = GL_TRUE; - number_of_scalar_operations = 1; - } - else - { - is_single_scalar_operation = GL_FALSE; - number_of_scalar_operations = 4; - } - - contiguous_slots_needed = 0; - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - - initialize(pAsm); - - for (scalar_channel_index=0; - scalar_channel_index < number_of_scalar_operations; - scalar_channel_index++) - { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - - //src 0 - current_source_index = 0; - pcurrent_source = &(pAsm->S[0].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - current_source_index = 1; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if( (is_single_scalar_operation == GL_TRUE) - || (GL_TRUE == bSplitInst) ) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - else - { - alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - if ( is_single_scalar_operation == GL_TRUE ) - { - // Override scalar_channel_index since only one scalar value will be written - if(pAsm->D.dst.writex) - { - scalar_channel_index = 0; - } - else if(pAsm->D.dst.writey) - { - scalar_channel_index = 1; - } - else if(pAsm->D.dst.writez) - { - scalar_channel_index = 2; - } - else if(pAsm->D.dst.writew) - { - scalar_channel_index = 3; - } - } - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - current_source_index = 2; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - /* - * Judge the type of current instruction, is it vector or scalar - * instruction. - */ - if (is_single_scalar_operation) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) { R700ALUInstruction * alu_instruction_ptr; @@ -2987,44 +2757,6 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[2].bits = 0; pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; - - return GL_TRUE; -} - -GLboolean next_ins2(r700_AssemblerBase *pAsm) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction2(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) - { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - pAsm->D2.bits = 0; return GL_TRUE; @@ -4537,7 +4269,7 @@ GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) pAsm->S[1].src.swizzlez = SQ_SEL_0; pAsm->S[1].src.swizzlew = SQ_SEL_0; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5683,6 +5415,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D.dst.predicated = 0; /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ #if 0 pAsm->S[0].src.rtype = SRC_REC_LITERAL; //pAsm->S[0].src.reg = 0; @@ -5707,7 +5440,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->S[0].src.swizzlez = flagValue; pAsm->S[0].src.swizzlew = flagValue; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5735,6 +5468,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->D2.dst2.literal = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ pAsm->S[0].src.rtype = DST_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->flag_reg_index; @@ -5768,7 +5502,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->S[1].src.swizzlez = SQ_SEL_1; pAsm->S[1].src.swizzlew = SQ_SEL_1; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index cfa2610a55..cb7685464d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -116,6 +116,7 @@ typedef struct PVSINSTtag { BITS literal :2; BITS SaturateMode :2; + BITS index_mode :3; } PVSINST; typedef struct PVSSRCtag @@ -529,10 +530,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); -GLboolean next_ins2(r700_AssemblerBase *pAsm); -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm); - -/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */ +/* TODO : merge next_ins/literal, assemble_alu_instruction/literal */ GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); -- cgit v1.2.3 From 4e86cedf5b7ab98dbe59115fc325f9b3172d58be Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 15:23:40 +0200 Subject: r600: add assembler support for literal(inline) constants and use it in cubemap instruction sequence for testing --- src/mesa/drivers/dri/r600/r700_assembler.c | 67 +++++++++++++++++++++--------- src/mesa/drivers/dri/r600/r700_assembler.h | 3 +- 2 files changed, 49 insertions(+), 21 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 8155d53eeb..c1e3377af6 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1733,7 +1733,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -2168,6 +2168,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -2238,18 +2242,39 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } //src 0 current_source_index = 0; @@ -2447,12 +2472,12 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } } - contiguous_slots_needed = 0; + contiguous_slots_needed -= 1; } return GL_TRUE; } - +#if 0 GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) { R700ALUInstruction * alu_instruction_ptr; @@ -2705,7 +2730,7 @@ GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * p return GL_TRUE; } - +#endif GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); @@ -2758,11 +2783,11 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; pAsm->D2.bits = 0; - + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } -/* not work yet */ +#if 0/* not work yet */ GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); @@ -2784,7 +2809,7 @@ GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) pAsm->need_tex_barrier = GL_FALSE; return GL_TRUE; } - +#endif GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) { BITS tmp; @@ -4472,13 +4497,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp2; + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; @@ -4489,12 +4515,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); - +#if 0 /* ADD the remaining .5 */ pAsm->D.dst.opcode = SQ_OP2_INST_ADD; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4515,7 +4542,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) noswizzle_PVSSRC(&(pAsm->S[1].src)); next_ins(pAsm); - +#endif /* tmp1.xy = temp2.xy */ pAsm->D.dst.opcode = SQ_OP2_INST_MOV; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -5410,7 +5437,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 0; /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ @@ -5465,7 +5492,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 1; pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index cb7685464d..3fe65654ca 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -114,7 +114,7 @@ typedef struct PVSDSTtag typedef struct PVSINSTtag { - BITS literal :2; + BITS literal_slots :2; BITS SaturateMode :2; BITS index_mode :3; } PVSINST; @@ -345,6 +345,7 @@ typedef struct r700_AssemblerBase PVSDWORD D; PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; unsigned int last_cond_register; -- cgit v1.2.3 From 2b8b16f6a6ce6091d4939cfb567a65a52757dff0 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 16:09:10 +0200 Subject: r600: use the new inline constants feature to fix COS --- src/mesa/drivers/dri/r600/r700_assembler.c | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index c1e3377af6..660410f1ad 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3041,7 +3041,42 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_COS(r700_AssemblerBase *pAsm) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + //return assemble_math_function(pAsm, SQ_OP2_INST_COS); + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) -- cgit v1.2.3 From fbe06a9c2999a802333f8310156d58045d723799 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 16:23:07 +0200 Subject: r600: fix SIN also --- src/mesa/drivers/dri/r600/r700_assembler.c | 15 +++++---------- src/mesa/drivers/dri/r600/r700_assembler.h | 3 +-- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 660410f1ad..caccedabdf 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3039,10 +3039,9 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { int tmp; - //return assemble_math_function(pAsm, SQ_OP2_INST_COS); checkop1(pAsm); tmp = gethelpr(pAsm); @@ -3062,7 +3061,7 @@ GLboolean assemble_COS(r700_AssemblerBase *pAsm) pAsm->C[1].f = 0.0F; next_ins(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; assemble_dst(pAsm); @@ -3075,6 +3074,7 @@ GLboolean assemble_COS(r700_AssemblerBase *pAsm) next_ins(pAsm); + //TODO - replicate if more channels set in WriteMask return GL_TRUE; } @@ -4192,11 +4192,6 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; @@ -5693,7 +5688,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -5790,7 +5785,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 3fe65654ca..f83206b726 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -548,7 +548,6 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); @@ -569,12 +568,12 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); -- cgit v1.2.3 From 0f854105f5a430ab36281c9bed530eccb8b8f44c Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 16:27:05 +0200 Subject: r600: remove (now) dead code --- src/mesa/drivers/dri/r600/r700_assembler.c | 301 +---------------------------- src/mesa/drivers/dri/r600/r700_assembler.h | 4 - 2 files changed, 2 insertions(+), 303 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index caccedabdf..dd1199756d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2477,260 +2477,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_TRUE; } -#if 0 -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - R700ALUInstruction * alu_instruction_ptr; - R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; - R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; - - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - GLuint contiguous_slots_needed; - GLuint lastInstruction; - GLuint not_masked[4]; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - number_of_scalar_operations = 0; - contiguous_slots_needed = 0; - - if(1 == pAsm->D.dst.writew) - { - lastInstruction = 3; - number_of_scalar_operations++; - not_masked[3] = 1; - } - else - { - not_masked[3] = 0; - } - if(1 == pAsm->D.dst.writez) - { - lastInstruction = 2; - number_of_scalar_operations++; - not_masked[2] = 1; - } - else - { - not_masked[2] = 0; - } - if(1 == pAsm->D.dst.writey) - { - lastInstruction = 1; - number_of_scalar_operations++; - not_masked[1] = 1; - } - else - { - not_masked[1] = 0; - } - if(1 == pAsm->D.dst.writex) - { - lastInstruction = 0; - number_of_scalar_operations++; - not_masked[0] = 1; - } - else - { - not_masked[0] = 0; - } - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - else - { - contiguous_slots_needed = number_of_scalar_operations; - } - - if(1 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 1; - } - else if(2 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 2; - } - - initialize(pAsm); - - for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++) - { - if(0 == not_masked[scalar_channel_index]) - { - continue; - } - - if(scalar_channel_index == lastInstruction) - { - switch (pAsm->D2.dst2.literal) - { - case 0: - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - break; - case 1: - alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); - if (alu_instruction_ptr_hl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; - break; - case 2: - alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); - if (alu_instruction_ptr_fl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; - break; - default: - break; - }; - } - else - { - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - } - - //src 0 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 0, - &(pAsm->S[0].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 1, - &(pAsm->S[1].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if(scalar_channel_index == lastInstruction) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 2, - &(pAsm->S[2].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - if (1 == number_of_scalar_operations) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - contiguous_slots_needed -= 2; - } - - return GL_TRUE; -} -#endif GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); @@ -2787,29 +2534,6 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) return GL_TRUE; } -#if 0/* not work yet */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - return GL_TRUE; -} -#endif GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) { BITS tmp; @@ -4533,8 +4257,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp2; - pAsm->D2.dst2.literal_slots = 1; - pAsm->C[0].f = 1.5F; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; @@ -4546,33 +4268,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); -#if 0 - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); -#endif /* tmp1.xy = temp2.xy */ pAsm->D.dst.opcode = SQ_OP2_INST_MOV; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f83206b726..6dc44017eb 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -531,10 +531,6 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); -/* TODO : merge next_ins/literal, assemble_alu_instruction/literal */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); - GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); -- cgit v1.2.3 From 629a648b059d8a2653b6a9cdf7f460533de0e1da Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 7 Dec 2009 17:22:03 +0200 Subject: r600: and finally fix SCS --- src/mesa/drivers/dri/r600/r700_assembler.c | 97 ++++++++++++++---------------- 1 file changed, 46 insertions(+), 51 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index dd1199756d..aed84fc3bd 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2237,7 +2237,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } @@ -3920,68 +3920,63 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + checkop1(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_src(pAsm, 0, -1); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + next_ins(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } -- cgit v1.2.3 From 8927b72118f9433aafd0e811cfc1981215eb3c5f Mon Sep 17 00:00:00 2001 From: Richard Li Date: Wed, 9 Dec 2009 15:39:16 -0500 Subject: r600 : add pre-compile mesa shader calling interface, in order to handle complex built-in shader instructions. --- src/mesa/drivers/dri/r600/r700_assembler.c | 407 +++++++++++++++++++++++++++-- src/mesa/drivers/dri/r600/r700_assembler.h | 65 ++++- src/mesa/drivers/dri/r600/r700_fragprog.c | 25 +- src/mesa/drivers/dri/r600/r700_vertprog.c | 25 +- 4 files changed, 498 insertions(+), 24 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index aed84fc3bd..e84f524525 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" @@ -41,6 +42,39 @@ #define USE_CF_FOR_CONTINUE_BREAK 1 #define USE_CF_FOR_POP_AFTER 1 +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -330,14 +364,14 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: case SQ_OP2_INST_KILLE: @@ -378,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -500,6 +534,11 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->unCFflags = 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + return 0; } @@ -2010,7 +2049,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; srcD.dst.opcode, pAsm->D.dst.op3); for (src=0; srcD.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -4968,7 +5007,7 @@ void add_return_inst(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; } -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) { /* Put in sub */ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) @@ -4983,7 +5022,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->unSubArraySize += 10; } - pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; @@ -5074,9 +5113,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Shift, GLuint uiNumberInsts, - struct prog_instruction *pILInst) + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) { + GLint uiIL_Offset; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; if(GL_FALSE == add_cf_instruction(pAsm) ) @@ -5109,8 +5152,12 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->unCallerArraySize += 10; } - pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex; - pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; pAsm->unCallerArrayPointer++; @@ -5120,7 +5167,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLboolean bRet; for(j=0; junSubArrayPointer; j++) { - if(nILindex == pAsm->subs[j].subIL_Offset) + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) { /* compiled before */ max = pAsm->subs[j].unStackDepthMax @@ -5138,7 +5185,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; unSubID = pAsm->unSubArrayPointer; - bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); if(GL_TRUE == bRet) { @@ -5148,6 +5195,8 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, { pAsm->CALLSTACK[pAsm->CALLSP].max = max; } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; } return bRet; @@ -5313,6 +5362,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) } GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) @@ -5468,6 +5518,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -5653,7 +5723,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_BGNSUB: - if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) ) + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) { return GL_FALSE; } @@ -5668,9 +5738,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_CAL: if( GL_FALSE == assemble_CAL(pR700AsmCode, - pILInst[i].BranchTarget, + pILInst[i].BranchTarget, + uiIL_Shift, uiNumberInsts, - pILInst) ) + pILInst, + NULL) ) { return GL_FALSE; } @@ -5707,7 +5779,7 @@ GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) return GL_TRUE; } -GLboolean RelocProgram(r700_AssemblerBase * pAsm) +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) { GLuint i; GLuint unCFoffset; @@ -5717,6 +5789,12 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; /* remove flags init if they are not used */ @@ -5762,6 +5840,11 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) unCFoffset = plstCFmain->uNumOfNode; + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + /* Reloc subs */ for(i=0; iunSubArrayPointer; i++) { @@ -5799,6 +5882,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) pInst = pInst->pNextInst; }; + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + /* Put sub into main */ plstCFmain->pTail->pNextInst = plstCFsub->pHead; plstCFmain->pTail = plstCFsub->pTail; @@ -5812,11 +5973,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) { pAsm->callers[i].cf_ptr->m_Word0.f.addr = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } } return GL_TRUE; } +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; iD.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; iunNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -6174,6 +6540,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) FREE(pR700AsmCode->callers); } + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 6dc44017eb..6ef945dfda 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -296,6 +335,7 @@ typedef struct SUB_OFFSET GLint subIL_Offset; GLuint unCFoffset; GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; TypedShaderList lstCFInstructions_local; } SUB_OFFSET; @@ -304,6 +344,9 @@ typedef struct CALLER_POINTER GLint subIL_Offset; GLint subDescIndex; R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; } CALLER_POINTER; #define SQ_MAX_CALL_DEPTH 0x00000020 @@ -437,6 +480,11 @@ typedef struct r700_AssemblerBase GLuint unCFflags; + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + } r700_AssemblerBase; //Internal use @@ -458,7 +506,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -585,13 +633,15 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm); GLboolean assemble_COND(r700_AssemblerBase *pAsm); GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex); +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); GLboolean assemble_RET(r700_AssemblerBase *pAsm); GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Offest, GLuint uiNumberInsts, - struct prog_instruction *pILInst); + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, @@ -602,16 +652,23 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); -GLboolean RelocProgram(r700_AssemblerBase * pAsm); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 8eb439a951..d15f013710 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -325,7 +325,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, { fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + if( GL_FALSE == AssembleInstr(0, + 0, mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) @@ -338,7 +342,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } - if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) { return GL_FALSE; } @@ -620,6 +624,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; uiunNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; ujNumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 759b74dc7e..90fac078ff 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -341,7 +341,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, { vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + if(GL_FALSE == AssembleInstr(0, + 0, vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) @@ -354,7 +358,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) { return GL_FALSE; } @@ -671,5 +675,24 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; uir700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; ujNumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } -- cgit v1.2.3 From 18ebcfe39360dc0ef1e175fe6c39cbb857432ab4 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Mon, 14 Dec 2009 18:02:05 -0500 Subject: r600 : add texture support for vertex shader. --- src/mesa/drivers/dri/r600/r700_assembler.c | 98 +++++++++++++++++++---------- src/mesa/drivers/dri/r600/r700_assembler.h | 2 + src/mesa/drivers/dri/r600/r700_chip.c | 34 ++++++++-- src/mesa/drivers/dri/r600/r700_shaderinst.h | 7 +++ 4 files changed, 104 insertions(+), 37 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e84f524525..d493d4e2b1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -539,6 +539,8 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->unNumPresub = 0; pAsm->unCurNumILInsts = 0; + pAsm->unVetTexBits = 0; + return 0; } @@ -1412,43 +1414,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) + if(SPT_VP == pAsm->currentShaderType) + { + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else { - case FRAG_ATTRIB_WPOS: - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_FOGC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - case FRAG_ATTRIB_FACE: - fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); - break; - case FRAG_ATTRIB_PNTC: - fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); - break; - } - - if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || - (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) - { - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + } } - break; + break; } } @@ -1493,8 +1517,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 < texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; if (normalized) { @@ -1513,7 +1546,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; tex_instruction_ptr->m_Word2.f.offset_z = 0x0; - tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; // dst diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 6ef945dfda..dbd9860f7d 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -485,6 +485,8 @@ typedef struct r700_AssemblerBase GLuint unNumPresub; GLuint unCurNumILInsts; + GLuint unVetTexBits; + } r700_AssemblerBase; //Internal use diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index ee2a0a4c8a..0b90079c18 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct r700_vertex_program *vp = context->selected_vp; + struct radeon_bo *bo = NULL; unsigned int i; BATCH_LOCALS(&context->radeon); @@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; uint32_t offset; if (t) { @@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); + + if( (1r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + } + else + { + R600_OUT_BATCH(i * 7); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); @@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) } } +#define SAMPLER_STRIDE 3 + static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; + + struct r700_vertex_program *vp = context->selected_vp; + BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; if (t) { BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); + + if( (1r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1 + } + else + { + R600_OUT_BATCH(i * 3); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h index 2829cca0a3..cdb9a570f7 100644 --- a/src/mesa/drivers/dri/r600/r700_shaderinst.h +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h @@ -42,6 +42,13 @@ #define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0 #define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0 +//richard dec.10 glsl +#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000 +#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012 +#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012 +#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012 +//------------------- + #define SHADERINST_TYPEMASK_CF 0x10 #define SHADERINST_TYPEMASK_ALU 0x20 #define SHADERINST_TYPEMASK_TEX 0x40 -- cgit v1.2.3 From 0c046bec8f78f33e7530416e0faa4d127d08e641 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 14 Dec 2009 10:48:36 +0200 Subject: r600: add DDX DDY opcodes --- src/mesa/drivers/dri/r600/r700_assembler.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index d493d4e2b1..43dafd5b8a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -546,7 +546,8 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 GLboolean IsTex(gl_inst_opcode Opcode) { - if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) { return GL_TRUE; } @@ -4363,13 +4364,20 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB) + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) { - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; - } - else - { - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; } pAsm->is_tex = GL_TRUE; @@ -5682,7 +5690,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } break; - + case OPCODE_DDX: + case OPCODE_DDY: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: -- cgit v1.2.3 From dbc374cd3030d5db2c8f5d9b9405976d7efa458d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 15 Dec 2009 10:22:34 +0200 Subject: r600: fix typos for vert-tex at least i think this is how it was meant to work --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- src/mesa/drivers/dri/r600/r700_chip.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 43dafd5b8a..e10b23b97f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1523,7 +1523,7 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize if(SPT_VP == pAsm->currentShaderType) { tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; - pAsm->unVetTexBits |= 1 < texture_unit_source->reg; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; } else { diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 0b90079c18..c124e02184 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -75,7 +75,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - if( (1r700AsmCode.unVetTexBits ) + if( (1<r700AsmCode.unVetTexBits ) { /* vs texture */ R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); } @@ -127,7 +127,7 @@ static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *at BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - if( (1r700AsmCode.unVetTexBits ) + if( (1<r700AsmCode.unVetTexBits ) { /* vs texture */ R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1 } -- cgit v1.2.3 From 69728a2ae28d11e48b87e51dc3ea2fcc040c40fb Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 21 Dec 2009 11:47:12 +0200 Subject: r600: disallow negative offsets for relative addressing for now otherwise for example const[ADDR-3] gets us 253 - ALU_SRC_LITERAL which expects immediate floats to follow and hangs --- src/mesa/drivers/dri/r600/r700_assembler.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e10b23b97f..51692a11ff 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1262,7 +1262,15 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; - pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + if(pILInst->SrcReg[src].Index < 0) + { + WARN_ONCE("Negative register offsets not supported yet!\n"); + pAsm->S[fld].src.reg = 0; + } + else + { + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + } break; case PROGRAM_INPUT: setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); -- cgit v1.2.3 From ddd9729bc37f4b1098ef940da6e723743db3ded8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Dec 2009 14:21:07 -0700 Subject: mesa: adjust OPCODE_IF/ELSE BranchTarget fields to point to ELSE/ENDIF instr. This is a little more logical. Suggested in bug report 25654. --- src/mesa/drivers/dri/r600/r700_assembler.c | 4 ++-- src/mesa/shader/prog_execute.c | 10 ++++++++-- src/mesa/shader/slang/slang_emit.c | 22 +++++++++++++++++----- 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 51692a11ff..e464c6191c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -5717,11 +5717,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; - case OPCODE_IF : + case OPCODE_IF: { GLboolean bHasElse = GL_FALSE; - if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE) + if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE) { bHasElse = GL_TRUE; } diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index 56d174c6ce..7f034520cd 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -910,6 +910,10 @@ _mesa_execute_program(GLcontext * ctx, case OPCODE_IF: { GLboolean cond; + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ELSE || + program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDIF); /* eval condition */ if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { GLfloat a[4]; @@ -929,14 +933,16 @@ _mesa_execute_program(GLcontext * ctx, else { /* go to the instruction after ELSE or ENDIF */ assert(inst->BranchTarget >= 0); - pc = inst->BranchTarget - 1; + pc = inst->BranchTarget; } } break; case OPCODE_ELSE: /* goto ENDIF */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDIF); assert(inst->BranchTarget >= 0); - pc = inst->BranchTarget - 1; + pc = inst->BranchTarget; break; case OPCODE_ENDIF: /* nothing */ diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index e769d0d3ad..ce3f6ab7ea 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -1728,6 +1728,7 @@ emit_if(slang_emit_info *emitInfo, slang_ir_node *n) if (!inst) { return NULL; } + prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions - 1; } else { /* jump to endif instruction */ @@ -1737,8 +1738,8 @@ emit_if(slang_emit_info *emitInfo, slang_ir_node *n) } inst_comment(inst, "else"); inst->DstReg.CondMask = COND_TR; /* always branch */ + prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions; } - prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions; emit(emitInfo, n->Children[2]); } else { @@ -1753,8 +1754,14 @@ emit_if(slang_emit_info *emitInfo, slang_ir_node *n) } } - if (n->Children[2]) { - prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions; + if (elseInstLoc) { + /* point ELSE instruction BranchTarget at ENDIF */ + if (emitInfo->EmitHighLevelInstructions) { + prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions - 1; + } + else { + prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions; + } } return NULL; } @@ -1824,7 +1831,12 @@ emit_loop(slang_emit_info *emitInfo, slang_ir_node *n) assert(inst->Opcode == OPCODE_BRK || inst->Opcode == OPCODE_BRA); /* go to instruction at end of loop */ - inst->BranchTarget = endInstLoc; + if (emitInfo->EmitHighLevelInstructions) { + inst->BranchTarget = endInstLoc; + } + else { + inst->BranchTarget = endInstLoc + 1; + } } else { assert(ir->Opcode == IR_CONT || @@ -1942,7 +1954,7 @@ emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n) } emitInfo->prog->Instructions[ifInstLoc].BranchTarget - = emitInfo->prog->NumInstructions; + = emitInfo->prog->NumInstructions - 1; return inst; } } -- cgit v1.2.3 From e04a818606f5639d9d8df4ebe501abb496bf847f Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 29 Dec 2009 14:47:01 +0200 Subject: r600: support vertex_array_bgra Use vertex program key mechanism and swizzle during vertex fetch - is there a better way? --- src/mesa/drivers/dri/r600/r600_context.h | 1 + src/mesa/drivers/dri/r600/r700_assembler.c | 20 ++++++++++++++++---- src/mesa/drivers/dri/r600/r700_assembler.h | 1 + src/mesa/drivers/dri/r600/r700_vertprog.c | 8 ++++++-- src/mesa/drivers/dri/r600/r700_vertprog.h | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 394fd757d4..94662ab547 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -108,6 +108,7 @@ typedef struct StreamDesc GLint size; //number of data element GLenum type; //data element type GLsizei stride; + GLenum format; // GL_RGBA,GLBGRA struct radeon_bo *bo; GLint bo_offset; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index e464c6191c..1ff89e18ea 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -891,6 +891,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLubyte element, GLuint _signed, GLboolean normalize, + GLenum format, VTX_FETCH_METHOD * pFetchMethod) { GLuint client_size_inbyte; @@ -939,10 +940,21 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; - vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; - vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; - vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; - vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + if(format == GL_BGRA) + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + } + else + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + } vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; vfetch_instruction_ptr->m_Word1.f.data_format = data_format; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index dbd9860f7d..86342b814f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -531,6 +531,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLubyte element, GLuint _signed, GLboolean normalize, + GLenum format, VTX_FETCH_METHOD * pFetchMethod); GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 90fac078ff..782f151f5a 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -188,7 +188,8 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions2( context->stream_desc[i].size, context->stream_desc[i].element, context->stream_desc[i]._signed, - context->stream_desc[i].normalize, + context->stream_desc[i].normalize, + context->stream_desc[i].format, &vtxFetchMethod); } @@ -319,6 +320,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, vp->aos_desc[i].size = context->stream_desc[i].size; vp->aos_desc[i].stride = context->stream_desc[i].stride; vp->aos_desc[i].type = context->stream_desc[i].type; + vp->aos_desc[i].format = context->stream_desc[i].format; } if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) @@ -395,7 +397,8 @@ void r700SelectVertexShader(GLcontext *ctx) match = GL_TRUE; for(i=0; inNumActiveAos; i++) { - if (vp->aos_desc[i].size != context->stream_desc[i].size) + if (vp->aos_desc[i].size != context->stream_desc[i].size || + vp->aos_desc[i].format != context->stream_desc[i].format) { match = GL_FALSE; break; @@ -498,6 +501,7 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s pStreamDesc->size = input->Size; pStreamDesc->dst_loc = context->nNumActiveAos; pStreamDesc->element = unLoc; + pStreamDesc->format = input->Format; switch (pStreamDesc->type) { //GetSurfaceFormat diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index 00824c29d3..645c9ac84a 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -39,6 +39,7 @@ typedef struct ArrayDesc //TEMP GLint size; //number of data element GLenum type; //data element type GLsizei stride; + GLenum format; //GL_RGBA or GL_BGRA } ArrayDesc; struct r700_vertex_program -- cgit v1.2.3 From 750c1e7bb4b5caab699d8bce8906b5ca138eac51 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 5 Jan 2010 13:44:06 +0200 Subject: r600: support depth compare functions & shadow_ambient --- src/mesa/drivers/dri/r600/r600_context.c | 2 ++ src/mesa/drivers/dri/r600/r600_texstate.c | 34 +++++++++++++++++++++++++ src/mesa/drivers/dri/r600/r700_assembler.c | 40 +++++++++++++++++++++++++++++- src/mesa/drivers/dri/r600/r700_assembler.h | 2 ++ src/mesa/drivers/dri/r600/r700_fragprog.c | 22 +++++++++++++++- 5 files changed, 98 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 45bbc3c071..8f6264aaa2 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -99,6 +99,7 @@ static const struct dri_extension card_extensions[] = { {"GL_ARB_depth_clamp", NULL}, {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_fragment_program_shadow", NULL}, {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, @@ -163,6 +164,7 @@ static const struct dri_extension gl_20_extension[] = { #else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, #endif /* R600_ENABLE_GLSL_TEST */ + {NULL, NULL} }; static const struct tnl_pipeline_stage *r600_pipeline[] = { diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 2a4a6e6ee1..937f127e7c 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -626,6 +626,31 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa return GL_TRUE; } +static GLuint r600_translate_shadow_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return SQ_TEX_DEPTH_COMPARE_NEVER; + case GL_LESS: + return SQ_TEX_DEPTH_COMPARE_LESS; + case GL_LEQUAL: + return SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case GL_GREATER: + return SQ_TEX_DEPTH_COMPARE_GREATER; + case GL_GEQUAL: + return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case GL_NOTEQUAL: + return SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case GL_EQUAL: + return SQ_TEX_DEPTH_COMPARE_EQUAL; + case GL_ALWAYS: + return SQ_TEX_DEPTH_COMPARE_ALWAYS; + default: + WARN_ONCE("Unknown shadow compare function! %d", func); + return 0; + } +} + void r600SetDepthTexMode(struct gl_texture_object *tObj) { radeonTexObjPtr t; @@ -711,6 +736,15 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); } + if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) + { + SETfield(t->SQ_TEX_SAMPLER0, r600_translate_shadow_func(texObj->CompareFunc), DEPTH_COMPARE_FUNCTION_shift, DEPTH_COMPARE_FUNCTION_mask); + } + else + { + CLEARfield(t->SQ_TEX_SAMPLER0, DEPTH_COMPARE_FUNCTION_mask); + } + } /** diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1ff89e18ea..0ff16b4ddd 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4397,7 +4397,10 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; break; default: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C; + else + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; } pAsm->is_tex = GL_TRUE; @@ -4443,11 +4446,46 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.swizzlew = SQ_SEL_Y; } + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* compare value goes to w chan ? */ + pAsm->S[0].src.swizzlew = SQ_SEL_Z; + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* add ARB shadow ambient but clamp to 0..1 */ + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* ADD_SAT dst, dst, ambient[texunit] */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + pAsm->D2.dst2.SaturateMode = 1; + + pAsm->S[0].src.rtype = pAsm->D.dst.rtype; + pAsm->S[0].src.reg = pAsm->D.dst.reg; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_CONSTANT; + pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; + noswizzle_PVSSRC(&(pAsm->S[1].src)); + noneg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 86342b814f..56baf5b0d9 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -487,6 +487,8 @@ typedef struct r700_AssemblerBase GLuint unVetTexBits; + GLuint shadow_regs[R700_MAX_TEXTURE_UNITS]; + } r700_AssemblerBase; //Internal use diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index ce2d9fdf79..84d51e6606 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -362,8 +362,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, { GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; - GLuint unBit; + GLuint unBit, shadow_unit; int i; + struct prog_instruction *inst; + gl_state_index shadow_ambient[STATE_LENGTH] + = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0}; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); @@ -373,6 +376,23 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, insert_wpos_code(ctx, mesa_fp); } + /* add/map consts for ARB_shadow_ambient */ + if(mesa_fp->Base.ShadowSamplers) + { + inst = mesa_fp->Base.Instructions; + for (i = 0; i < mesa_fp->Base.NumInstructions; i++) + { + if(inst->TexShadow == 1) + { + shadow_unit = inst->TexSrcUnit; + shadow_ambient[2] = shadow_unit; + fp->r700AsmCode.shadow_regs[shadow_unit] = + _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient); + } + inst++; + } + } + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) -- cgit v1.2.3