From 66a5562ce2906fbf5b96d1cee18f9a31a78c4360 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 23:49:50 +1000 Subject: r300: fix swtcl texrect path properly. We really need to update the shader state so the texrect parameters work. This should fix compiz looking crappy on rs480 and rs690 --- src/mesa/drivers/dri/r300/r300_state.c | 3 ++- src/mesa/drivers/dri/r300/r300_swtcl.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 10002e3c4f..dbe1f6952e 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2323,10 +2323,11 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); return; } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); } static void r300SetupPixelShader(r300ContextPtr rmesa) diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a41fa1023a..8aebd9be3e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -575,6 +575,7 @@ static void r300RenderStart(GLcontext *ctx) r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); + r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); -- cgit v1.2.3 From 1f420b008bd4bc7b5fe7809e7f7506ef5dcb7209 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 16:41:07 +1000 Subject: r500: make sure we emit max temp atom. We don't appear to update max_temp_idx yet anywhere though --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ++++ src/mesa/drivers/dri/r300/r300_context.h | 5 +++++ src/mesa/drivers/dri/r300/r300_state.c | 3 +++ 3 files changed, 12 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3f9d9da399..75f8910c3e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -412,6 +412,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bb5f5c35f0..980a26ffdd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -354,6 +354,11 @@ struct r300_state_atom { #define R300_FP_NODE3 8 #define R300_FP_CMDSIZE 9 +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMDSIZE 3 + #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 #define R300_FPT_CMDSIZE 65 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index dbe1f6952e..b79b5e99f6 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2434,6 +2434,9 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) return; } + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ for (i = 0; i < fp->cs->nrslots; i++) { -- cgit v1.2.3 From 3816ae9ce835691e690d68f37ff6b01207068870 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:05:59 +1000 Subject: r500: make tri-param work This makes constant work which are 32-bit on r500 unlike r300. Switch MOV to using MAD no idea if we might have negative things MAX 0,-5 is likely to do the wrong thing.. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- src/mesa/drivers/dri/r300/r300_state.c | 8 ++++---- src/mesa/drivers/dri/r300/r500_fragprog.c | 26 ++++++++++++++++---------- 3 files changed, 21 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 75f8910c3e..806e2755c5 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" // Set this to 1 for extremely verbose debugging of command buffers -#define DEBUG_CMDBUF 0 +#define DEBUG_CMDBUF 1 /** * Send the current command buffer via ioctl to the hardware. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index b79b5e99f6..a083db9bbc 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2453,10 +2453,10 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b08beb617f..5d4412bb1f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -158,6 +158,8 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe * fragments don't get loaded right otherwise! */ reg = 0x0; break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> ParameterValues[src.Index]); @@ -440,8 +442,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_MOV: src[0] = make_src(fp, fpi->SrcReg[0]); - /* We use MAX, but MIN, CND, and CMP also work. - * Just remember to disable the OMOD! */ + + /* changed to use MAD - not sure if we + ever have negative things which max will fail on */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); @@ -449,14 +452,17 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1; + + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; + + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 + | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -593,7 +599,7 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + fp->max_temp_idx = 64; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; -- cgit v1.2.3 From 697680d687544c4495f05d5baa83659fb877477b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:15:40 +1000 Subject: r500: mov cleanup macros --- src/mesa/drivers/dri/r300/r500_fragprog.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 5d4412bb1f..ac6e306d20 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -76,6 +76,7 @@ #define R500_SWIZZLE_ONE 6 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) /* Swizzles for inst2 */ #define MAKE_SWIZ_TEX_STRQ(x) (x << 8) #define MAKE_SWIZ_TEX_RGBA(x) (x << 24) @@ -450,10 +451,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1; - + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 @@ -461,8 +461,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 - | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 66a49df9cba8f17059be420126346a4234e81cba Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:42:27 +1000 Subject: r500: consolidate tex instructions you cannot change a tex into an output so this means we have to actually do another instruction after this one to mov if its an output --- src/mesa/drivers/dri/r300/r500_fragprog.c | 68 ++++++++++++++++++------------- 1 file changed, 39 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ac6e306d20..bbcbd2efd1 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -192,6 +192,43 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist return reg; } +static void emit_tex(struct r500_fragment_program *fp, + struct prog_instruction *fpi, int opcode, int dest, int counter) +{ + int hwsrc, hwdest; + GLuint mask; + + mask = fpi->DstReg.WriteMask << 11; + hwsrc = make_src(fp, fpi->SrcReg[0]); + + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + + fp->inst[counter].inst1 = fpi->TexSrcUnit + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + switch (opcode) { + case OPCODE_TEX: + fp->inst[counter].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXP: + fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + } + + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) + /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ + | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A + | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + + + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; +} + static void dumb_shader(struct r500_fragment_program *fp) { fp->inst[0].inst0 = R500_INST_TYPE_TEX @@ -515,37 +552,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; case OPCODE_TEX: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask - | R500_INST_TEX_SEM_WAIT; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TEX, dest, counter); break; case OPCODE_TXP: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TXP, dest, counter); break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); -- cgit v1.2.3 From 06e2e1b87ce7db9f48b9d198d71d46636f7e6fe3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 11:57:24 -0700 Subject: r5xx: Use max_temp_idx. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index bbcbd2efd1..0e2bda1c64 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -152,7 +152,10 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = (src.Index << 0x1) | 0x1; + // reg = (src.Index << 0x1) | 0x1; + reg = src.Index; + if (src.Index > fp->max_temp_idx) + fp->max_temp_idx = src.Index; break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; @@ -177,7 +180,10 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = (dest.Index << 0x1) | 0x1; + // reg = (dest.Index << 0x1) | 0x1; + reg = dest.Index; + if (dest.Index > fp->max_temp_idx) + fp->max_temp_idx = src.Index; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -354,9 +360,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0); fp->inst[counter].inst3 = /* 1 */ MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); @@ -586,8 +592,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + } else { + /* We still need to put an output inst, right? */ } + fp->max_temp_idx++; + return GL_TRUE; } -- cgit v1.2.3 From 171ba1d0d154f7fdeb712fd411f19e1ebddd3b55 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:18:07 -0700 Subject: r5xx: Fix typo. Gotta be more careful with my cut'n'paste, lawl. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 0e2bda1c64..9ad081e9b8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -183,7 +183,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist // reg = (dest.Index << 0x1) | 0x1; reg = dest.Index; if (dest.Index > fp->max_temp_idx) - fp->max_temp_idx = src.Index; + fp->max_temp_idx = dest.Index; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple -- cgit v1.2.3 From fa465fb2b1ce4119e4ae8f9b64721f385f361ad9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:42:40 -0700 Subject: r5xx: We update max_temp_idx now, so no need to hard-code it. This roughly doubles the speed of glxgears (GINAB) by allowing more pixels to run concurrently. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 9ad081e9b8..b91cc273fd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -619,7 +619,7 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 64; + fp->max_temp_idx = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; -- cgit v1.2.3 From 1562dd2c26d43bffa8c6bd08ec6128c750ad58ff Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:44:53 -0700 Subject: r5xx: Emit an OUT instruction at the end of execution. This should make TEX/TXP work right. (Note: "Should" is not "does.") --- src/mesa/drivers/dri/r300/r500_fragprog.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b91cc273fd..65fa805d81 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -585,17 +585,35 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } - fp->cs->nrslots = counter; - - /* Finish him! (If it's an output instruction...) - * Yes, I know it's ugly... */ + /* Finish him! (If it's an ALU/OUT instruction...) */ if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { fp->inst[counter].inst0 |= R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; } else { /* We still need to put an output inst, right? */ + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST + | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G + | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(0) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(0) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } + fp->cs->nrslots = counter; + fp->max_temp_idx++; return GL_TRUE; -- cgit v1.2.3 From 20baf128ef39dca058636c1bff4c526a8879b3d5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 17:21:30 -0700 Subject: r5xx: FP: Make MOV/ABS look pretty. We can't really do anything like emit_alu, so we're doing emit_mov instead. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 71 ++++++++++++++----------------- 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 65fa805d81..e6f7e173f7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -310,7 +310,30 @@ static void dumb_shader(struct r500_fragment_program *fp) fp->translated = GL_TRUE; } -static void emit_alu(struct r500_fragment_program *fp) { +/* static void emit_alu(struct r500_fragment_program *fp) { + * } */ + +static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { + /* The r3xx shader uses MAD to implement MOV. We are using CMP, since + * it is technically more accurate and recommended by ATI/AMD. */ + GLuint src_reg = make_src(fp, src); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src)) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src)) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src)) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } static GLboolean parse_program(struct r500_fragment_program *fp) @@ -335,23 +358,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = make_src(fp, fpi->SrcReg[0]); - /* Variation on MOV */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; + fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS + | R500_ALU_RGB_MOD_B_ABS; + fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS + | R500_ALPHA_MOD_B_ABS; break; case OPCODE_ADD: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -485,27 +497,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MOV: - src[0] = make_src(fp, fpi->SrcReg[0]); - - /* changed to use MAD - not sure if we - ever have negative things which max will fail on */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; - - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 40db59038cc62a5a8e4f94cb069eeb1d9b95a1a9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 18:14:21 -0700 Subject: r5xx: FP: Add OPCODE_TXB. Tex lookup with biased LOD. Should magically work. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e6f7e173f7..15bc5798e0 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -216,8 +216,14 @@ static void emit_tex(struct r500_fragment_program *fp, case OPCODE_TEX: fp->inst[counter].inst1 |= R500_TEX_INST_LD; break; + case OPCODE_TXB: + fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; + break; case OPCODE_TXP: fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + break; + default: + ERROR("emit_tex can't handle opcode %x\n", opcode); } fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) @@ -553,6 +559,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); break; + case OPCODE_TXB: + emit_tex(fp, fpi, OPCODE_TXB, dest, counter); + break; case OPCODE_TXP: emit_tex(fp, fpi, OPCODE_TXP, dest, counter); break; -- cgit v1.2.3 From dc24fb51a31de8443e653655105d4e1c88847bcc Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 22:18:28 -0700 Subject: r5xx: Index inputs and temps. This is not the same as r3xx indexing. It only tries to protect inputs on the pixel stack from getting clobbered by temps or texs. Texs don't need special treatment since they read from special input regs and write to the same temp regs as ALU/FC instructions. --- src/mesa/drivers/dri/r300/r300_context.h | 5 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 131 +++++++++--------------------- 2 files changed, 41 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 980a26ffdd..815a729969 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -807,10 +807,7 @@ struct r500_fragment_program { int cur_node; int first_node_has_tex; - int alu_offset; - int alu_end; - int tex_offset; - int tex_end; + int temp_reg_offset; /* Hardware constants. * Contains a pointer to the value. The destination of the pointer diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 15bc5798e0..c753c2b6f7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -65,6 +65,9 @@ #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + /* "Register" flags */ #define REG_CONSTANT (1 << 8) #define REG_SRC_REL (1 << 9) @@ -121,6 +124,30 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = slot + fp->temp_reg_offset; + + while (cs->inputs[r].refcount != 0) { + /* Crap, taken. */ + r++; + } + + fp->temp_reg_offset = r - slot; + + if (r >= R500_US_NUM_TEMP_REGS) { + ERROR("Out of hardware temps!\n"); + return 0; + } + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ static GLuint emit_const4fv(struct r500_fragment_program *fp, const GLfloat * cp) @@ -134,8 +161,7 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } if (index >= fp->const_nr) { - /* TODO: This should be r5xx nums, not r300 */ - if (index >= PFS_NUM_CONST_REGS) { + if (index >= R500_US_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } @@ -152,15 +178,12 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - // reg = (src.Index << 0x1) | 0x1; - reg = src.Index; - if (src.Index > fp->max_temp_idx) - fp->max_temp_idx = src.Index; + reg = get_temp(fp, src.Index); break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; * fragments don't get loaded right otherwise! */ - reg = 0x0; + reg = src.Index; break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -180,10 +203,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - // reg = (dest.Index << 0x1) | 0x1; - reg = dest.Index; - if (dest.Index > fp->max_temp_idx) - fp->max_temp_idx = dest.Index; + reg = get_temp(fp, dest.Index); break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -323,7 +343,7 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ /* The r3xx shader uses MAD to implement MOV. We are using CMP, since * it is technically more accurate and recommended by ATI/AMD. */ GLuint src_reg = make_src(fp, src); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT; fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -511,7 +531,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -639,7 +659,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + /* Size of pixel stack, plus 1. */ + fp->max_temp_idx = 1; + /* Temp register offset. */ + fp->temp_reg_offset = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; @@ -659,49 +682,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * starting from register 0. */ -#if 0 - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); - insert_wpos(&mp->Base); - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } -#endif - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ @@ -712,35 +692,14 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; - for (i = 0; i < 3; i++) { idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: + if (fpi->SrcReg[i].File == PROGRAM_INPUT) { cs->inputs[idx].refcount++; - break; - default: - break; + if (fp->max_temp_idx < idx) + fp->max_temp_idx = idx; } } - - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - } } cs->temp_in_use = temps_used; } @@ -777,16 +736,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, return; } - /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; - //assert(fp->node[fp->cur_node].alu_end >= 0); - //assert(fp->alu_end >= 0); - fp->translated = GL_TRUE; r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } -- cgit v1.2.3 From 49c30ce958e5e95e9e6ab79d2308751705d0ff22 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 23:36:50 -0700 Subject: r5xx: Fix false error with DP3/DP4. DP3/DP4 only takes two arguments, but tried to load three, causing a false fallback to the dumb shader. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c753c2b6f7..f90f467cb7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -418,13 +418,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -433,23 +432,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_DP4: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); /* Based on DP3 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -458,11 +452,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 1da094c9adf49c48a8b61ee7ab5336e8ba3f9e8d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 7 May 2008 00:06:26 -0700 Subject: r5xx: Fix FP inputs. (For good?) FP inputs are now counted and mapped correctly, and temps are allocated tightly and correctly. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 67 +++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f90f467cb7..ed14c93df7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -128,7 +128,7 @@ static int get_temp(struct r500_fragment_program *fp, int slot) { COMPILE_STATE; - int r = slot + fp->temp_reg_offset; + int r = slot; while (cs->inputs[r].refcount != 0) { /* Crap, taken. */ @@ -175,15 +175,14 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { + COMPILE_STATE; GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = get_temp(fp, src.Index); + reg = src.Index + fp->temp_reg_offset; break; case PROGRAM_INPUT: - /* Ugly hack needed to work around Mesa; - * fragments don't get loaded right otherwise! */ - reg = src.Index; + reg = cs->inputs[src.Index].reg; break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -203,7 +202,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = get_temp(fp, dest.Index); + reg = dest.Index + fp->temp_reg_offset; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -669,17 +668,65 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * configures itself based on the fragprog's InputsRead * * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0. + * starting from register 0, so we're just going to do that instead. */ + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ if (!mp->Base.Instructions) { - ERROR("No instructions found in program\n"); + ERROR("No instructions found in program, going to go die now.\n"); return; } +#if 0 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; for (i = 0; i < 3; i++) { @@ -691,6 +738,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) } } } +#endif + + fp->max_temp_idx = fp->temp_reg_offset + 1; + cs->temp_in_use = temps_used; } -- cgit v1.2.3 From 53a7ccc08b286a02f5a276f213cfae31c8e6bf7c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 May 2008 15:16:27 +1000 Subject: r500: for rectangular textures set to unscaled coordinates. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ed14c93df7..f9ef582d0a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -231,6 +231,10 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 = fpi->TexSrcUnit | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) + fp->inst[counter].inst1 |= R500_TEX_UNSCALED; + switch (opcode) { case OPCODE_TEX: fp->inst[counter].inst1 |= R500_TEX_INST_LD; -- cgit v1.2.3 From 3d1528027889d67ca98002833dcb42b3f2f48067 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 May 2008 15:59:21 +1000 Subject: r500: cleanup r500 RS setup --- src/mesa/drivers/dri/r300/r300_reg.h | 33 ++++++-------- src/mesa/drivers/dri/r300/r300_state.c | 83 ++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index a6719d6553..c6d0d66c6f 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -721,23 +721,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 #define R500_RS_IP_TEX_PTR_S_SHIFT 0 #define R500_RS_IP_TEX_PTR_T_SHIFT 6 #define R500_RS_IP_TEX_PTR_R_SHIFT 12 #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 #define R500_RS_IP_COL_PTR_SHIFT 24 #define R500_RS_IP_COL_FMT_SHIFT 27 -#define R500_RS_IP_COL_FMT_RGBA (0 << 27) -#define R500_RS_IP_COL_FMT_RGB0 (1 << 27) -#define R500_RS_IP_COL_FMT_RGB1 (2 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_000A (4 << 27) -#define R500_RS_IP_COL_FMT_0000 (5 << 27) -#define R500_RS_IP_COL_FMT_0001 (6 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_111A (8 << 27) -#define R500_RS_IP_COL_FMT_1110 (9 << 27) -#define R500_RS_IP_COL_FMT_1111 (10 << 27) +# define R500_RS_COL_PTR(x) (x << 24) +# define R500_RS_COL_FMT(x) (x << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) #define R500_RS_IP_OFFSET_EN (1 << 31) @@ -1177,15 +1170,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_TEX_PTR(x) (x << 0) # define R300_RS_COL_PTR(x) (x << 6) # define R300_RS_COL_FMT(x) (x << 9) -# define R300_RS_COL_FMT_RGBA 0 -# define R300_RS_COL_FMT_RGB0 2 -# define R300_RS_COL_FMT_RGB1 3 -# define R300_RS_COL_FMT_000A 4 -# define R300_RS_COL_FMT_0000 5 -# define R300_RS_COL_FMT_0001 6 -# define R300_RS_COL_FMT_111A 8 -# define R300_RS_COL_FMT_1110 9 -# define R300_RS_COL_FMT_1111 10 +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 2 +# define R300_RS_COL_FMT_RGB1 3 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 # define R300_RS_SEL_S(x) (x << 13) # define R300_RS_SEL_T(x) (x << 16) # define R300_RS_SEL_R(x) (x << 19) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index a083db9bbc..298de096fb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1501,7 +1501,7 @@ static void r300SetupRSUnit(GLcontext * ctx) int rs_tex_count = 0, rs_col_count = 0; int i, count; - memset(interp_col, 0, 8); + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1640,22 +1640,17 @@ static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - 1 << 24, - 2 << 24, - 3 << 24, - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; union r300_outputs_written OutputsWritten; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; GLuint InputsRead; int fp_reg, high_rr; + int rs_tex_count = 0, rs_col_count = 0; int in_texcoords, col_interp_nr; - int i; + int i, count; + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else @@ -1672,7 +1667,7 @@ static void r500SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = in_texcoords = 0; r300->hw.rr.cmd[R300_RR_INST_1] = 0; @@ -1690,15 +1685,51 @@ static void r500SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R500_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + interp_col[1] |= R500_RS_COL_PTR(1); + if (count == 3) + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - - // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | - (in_texcoords << 0) | interp_magic[i]; + GLuint swiz; + + /* with TCL we always seem to route 4 components */ + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + swiz = 0; + if (count == 4) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; + else + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + + /* always have a least 2 tex coords */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + } + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count | swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -1715,16 +1746,11 @@ static void r500SetupRSUnit(GLcontext * ctx) WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - in_texcoords++; - } } if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1734,7 +1760,6 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) @@ -1751,7 +1776,7 @@ static void r500SetupRSUnit(GLcontext * ctx) col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; -- cgit v1.2.3