From 9b781ca2ce9cdf6c21eb5dda4709366b2581d17a Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Mon, 29 Jun 2009 21:39:59 +0200 Subject: r300: simplify insert_wpos a little --- src/mesa/drivers/dri/r300/r300_vertprog.c | 76 +++++++++++++------------------ 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index c41a8fdd62..1d198cde42 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1283,52 +1283,38 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, GLuint temp_index) { struct prog_instruction *vpi; - struct prog_instruction *vpi_insert; - int i = 0; - - vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); - _mesa_init_instructions(vpi, prog->NumInstructions + 2); - /* all but END */ - _mesa_copy_instructions(vpi, prog->Instructions, - prog->NumInstructions - 1); - /* END */ - _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); - vpi_insert = &vpi[prog->NumInstructions - 1]; - - vpi_insert[i].Opcode = OPCODE_MOV; - - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; - - vpi_insert[i].Opcode = OPCODE_MOV; - - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; - free(prog->Instructions); + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - prog->Instructions = vpi; + vpi = &prog->Instructions[prog->NumInstructions - 3]; - prog->NumInstructions += i; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + vpi->Opcode = OPCODE_MOV; - assert(vpi->Opcode == OPCODE_END); + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; } static void pos_as_texcoord(struct r300_vertex_program *vp, @@ -1336,16 +1322,16 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, { struct prog_instruction *vpi; GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ + prog->NumTemporaries++; for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT - && vpi->DstReg.Index == VERT_RESULT_HPOS) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { vpi->DstReg.File = PROGRAM_TEMPORARY; vpi->DstReg.Index = tempregi; } } + insert_wpos(vp, prog, tempregi); } -- cgit v1.2.3 From 4a6899e080bbdb556926031345ef6a9fbe6d9ff2 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Mon, 29 Jun 2009 21:48:35 +0200 Subject: r300: use mesa provided function for adding MVP code --- src/mesa/drivers/dri/r300/r300_context.c | 2 - src/mesa/drivers/dri/r300/r300_vertprog.c | 100 +++--------------------------- 2 files changed, 7 insertions(+), 95 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index b7d75426c5..6f3aab986d 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -418,8 +418,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); - _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); - /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 1d198cde42..5da9bda281 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -32,6 +32,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/enums.h" #include "shader/program.h" +#include "shader/programopt.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" @@ -1191,94 +1192,6 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 - -static void position_invariant(struct gl_program *prog) -{ - struct prog_instruction *vpi; - struct gl_program_parameter_list *paramList; - int i; - - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; - - /* tokens[4] = matrix modifier */ -#ifdef PREFER_DP4 - tokens[4] = 0; /* not transposed or inverted */ -#else - tokens[4] = STATE_MATRIX_TRANSPOSE; -#endif - paramList = prog->Parameters; - - vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); - _mesa_init_instructions(vpi, prog->NumInstructions + 4); - - for (i = 0; i < 4; i++) { - GLint idx; - tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ - idx = _mesa_add_state_reference(paramList, tokens); -#ifdef PREFER_DP4 - vpi[i].Opcode = OPCODE_DP4; - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - vpi[i].DstReg.File = PROGRAM_OUTPUT; - vpi[i].DstReg.Index = VERT_RESULT_HPOS; - vpi[i].DstReg.WriteMask = 1 << i; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; -#else - if (i == 0) - vpi[i].Opcode = OPCODE_MUL; - else - vpi[i].Opcode = OPCODE_MAD; - - vpi[i].Data = 0; - - if (i == 3) - vpi[i].DstReg.File = PROGRAM_OUTPUT; - else - vpi[i].DstReg.File = PROGRAM_TEMPORARY; - vpi[i].DstReg.Index = 0; - vpi[i].DstReg.WriteMask = 0xf; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); - - if (i > 0) { - vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; - vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; - } -#endif - } - - _mesa_copy_instructions(&vpi[i], prog->Instructions, - prog->NumInstructions); - - free(prog->Instructions); - - prog->Instructions = vpi; - - prog->NumInstructions += 4; - vpi = &prog->Instructions[prog->NumInstructions - 1]; - - assert(vpi->Opcode == OPCODE_END); -} - static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, GLuint temp_index) { @@ -1335,9 +1248,10 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, insert_wpos(vp, prog, tempregi); } -static struct r300_vertex_program *build_program(struct r300_vertex_program_key - *wanted_key, struct gl_vertex_program - *mesa_vp, GLint wpos_idx) +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + struct gl_vertex_program *mesa_vp, + GLint wpos_idx) { struct r300_vertex_program *vp; @@ -1346,7 +1260,7 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key vp->wpos_idx = wpos_idx; if (mesa_vp->IsPositionInvariant) { - position_invariant(&mesa_vp->Base); + _mesa_insert_mvp_code(ctx, mesa_vp); } if (wpos_idx > -1) { @@ -1480,7 +1394,7 @@ void r300SelectVertexShader(r300ContextPtr r300) fflush(stdout); } - vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; r300->selected_vp = vp; -- cgit v1.2.3 From 0b411a72f3cc3be7ecf9f4676d9860b2b56f084e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Mon, 29 Jun 2009 21:52:39 +0200 Subject: r300: print vertex program after adding artificial output insts --- src/mesa/drivers/dri/r300/r300_vertprog.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 5da9bda281..a0babeff92 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1259,6 +1259,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); vp->wpos_idx = wpos_idx; + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&mesa_vp->Base); + fflush(stdout); + } + if (mesa_vp->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, mesa_vp); } @@ -1267,12 +1273,6 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, pos_as_texcoord(vp, &mesa_vp->Base); } - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(&mesa_vp->Base); - fflush(stdout); - } - /* Some outputs may be artificially added, to match the inputs of the fragment program. * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by * vertex program are undefined, so just use MOV [vertex_result], CONST[0] @@ -1310,6 +1310,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, } } + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(&mesa_vp->Base); + fflush(stdout); + } + assert(mesa_vp->Base.NumInstructions); vp->num_temporaries = mesa_vp->Base.NumTemporaries; r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); @@ -1388,12 +1394,6 @@ void r300SelectVertexShader(r300ContextPtr r300) return; } - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(&vpc->mesa_program.Base); - fflush(stdout); - } - vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; -- cgit v1.2.3 From 21db37d43245f97032fa21030279c3b47c901639 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Wed, 1 Jul 2009 18:43:14 +0200 Subject: r300: translate non native insts earlier for easier debugging --- src/mesa/drivers/dri/r300/r300_vertprog.c | 202 ++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index a0babeff92..49fcb6ff2c 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -72,6 +72,33 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) +static GLuint combineSwizzles(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + if (swz_x == SWIZZLE_ZERO || swz_x == SWIZZLE_ONE) + ret |= swz_x; + else + ret |= GET_SWZ(src, swz_x); + + if (swz_y == SWIZZLE_ZERO || swz_y == SWIZZLE_ONE) + ret |= swz_y << 3; + else + ret |= GET_SWZ(src, swz_y) << 3; + + if (swz_z == SWIZZLE_ZERO || swz_z == SWIZZLE_ONE) + ret |= swz_z << 6; + else + ret |= GET_SWZ(src, swz_z) << 6; + + if (swz_w == SWIZZLE_ZERO || swz_w == SWIZZLE_ONE) + ret |= swz_w << 9; + else + ret |= GET_SWZ(src, swz_w) << 9; + + return ret; +} + int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program_cont *vp, float *dst) { @@ -1248,6 +1275,179 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, insert_wpos(vp, prog, tempregi); } +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateDP3(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + return 0; +} + +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + + return 0; +} + +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + dst = inst->DstReg; + + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; + + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; + + return 1; +} + +static int translateSUB(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateSWZ(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + GLuint orig_negate, orig_writemask; + + inst = &prog->Instructions[pos]; + orig_negate = inst->SrcReg[0].Negate; + orig_writemask = inst->DstReg.WriteMask; + + inst->Opcode = OPCODE_MOV; + + /* If all relevant components are either negated or not negated at the same time, we are ok. + */ + if ((orig_negate & orig_writemask) == 0 || (orig_negate & orig_writemask) == (NEGATE_XYZW & orig_writemask)) + return 0; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + inst->DstReg.WriteMask = orig_writemask & (orig_negate ^ NEGATE_XYZW); + inst->SrcReg[0].Negate = NEGATE_NONE; + ++inst; + + *inst = *(inst-1); + inst->DstReg.WriteMask = orig_writemask & orig_negate; + inst->SrcReg[0].Negate = NEGATE_XYZW; + + return 1; +} + +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combineSwizzles(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combineSwizzles(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; + } + } +} + static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, struct gl_vertex_program *mesa_vp, @@ -1310,6 +1510,8 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, } } + translateInsts(&mesa_vp->Base); + if (RADEON_DEBUG & DEBUG_VERTS) { fprintf(stderr, "Vertex program after native rewrite:\n"); _mesa_print_program(&mesa_vp->Base); -- cgit v1.2.3 From 28066ed012b51f2171866669a2972bc7ee293565 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 12:56:31 +0200 Subject: r300: update state parameters only once per rendering operation --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 11 ----------- src/mesa/drivers/dri/r300/r300_state.c | 6 +++--- src/mesa/drivers/dri/r300/r300_state.h | 1 - 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index abc8757ba1..1644e6f42a 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -50,13 +50,6 @@ #include "radeon_program.h" #include "radeon_program_alu.h" -static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) -{ - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (fp->Base.Parameters) - _mesa_load_state_parameters(ctx, fp->Base.Parameters); -} - static void nqssadce_init(struct nqssadce_state* s) { s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; @@ -281,11 +274,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) r300_fp->translated = GL_TRUE; - r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) r300->vtbl.FragmentProgramDump(&r300_fp->code); } - - update_params(ctx, fp); } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c0eda977db..67bd6ec33b 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1093,7 +1093,7 @@ r300FetchStateParameter(GLcontext * ctx, * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION */ -void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { struct r300_fragment_program *fp; struct gl_program_parameter_list *paramList; @@ -1111,6 +1111,8 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) if (!paramList) return; + _mesa_load_state_parameters(ctx, paramList); + for (i = 0; i < paramList->NumParameters; i++) { if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { r300FetchStateParameter(ctx, @@ -2323,8 +2325,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) R300_STATECHANGE(r300, cb); } - r300UpdateStateParameters(ctx, new_state); - r300->radeon.NewGLState |= new_state; } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 2328289420..d46bf9f179 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. void r300UpdateViewportOffset (GLcontext * ctx); void r300UpdateDrawBuffer (GLcontext * ctx); -void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); -- cgit v1.2.3 From 7829b7a1b85dd8e6c31189e7f3dec91d71d134c3 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 13:26:49 +0200 Subject: r300: cache translated fragment programs --- src/mesa/drivers/dri/r300/r300_context.h | 10 +- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 169 ++++++++++++----------- src/mesa/drivers/dri/r300/r300_fragprog_common.h | 6 +- src/mesa/drivers/dri/r300/r300_shader.c | 51 ++++++- src/mesa/drivers/dri/r300/r300_state.c | 59 ++++---- src/mesa/drivers/dri/r300/r300_swtcl.c | 2 +- 6 files changed, 173 insertions(+), 124 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 026c33c67c..fc436e1100 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -546,7 +546,7 @@ struct r500_fragment_program_code { * to render with that program. */ struct r300_fragment_program { - struct gl_fragment_program Base; + struct gl_program *Base; GLboolean translated; GLboolean error; @@ -559,6 +559,13 @@ struct r300_fragment_program { GLboolean writes_depth; GLuint optimization; + + struct r300_fragment_program *next; +}; + +struct r300_fragment_program_cont { + struct gl_fragment_program Base; + struct r300_fragment_program *progs; }; struct r300_fragment_program_compiler { @@ -633,6 +640,7 @@ struct r300_context { struct r300_hw_state hw; struct r300_vertex_program *selected_vp; + struct r300_fragment_program *selected_fp; /* Vertex buffers */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 1644e6f42a..b25cf24007 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -67,7 +67,7 @@ static void nqssadce_init(struct nqssadce_state* s) */ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { - GLuint InputsRead = compiler->fp->Base.Base.InputsRead; + GLuint InputsRead = compiler->fp->Base->InputsRead; if (!(InputsRead & FRAG_BIT_WPOS)) return; @@ -168,7 +168,7 @@ static GLuint build_func(GLuint comparefunc) */ static void build_state( r300ContextPtr r300, - struct r300_fragment_program *fp, + struct gl_fragment_program *fp, struct r300_fragment_program_external_state *state) { int unit; @@ -176,7 +176,7 @@ static void build_state( _mesa_bzero(state, sizeof(*state)); for(unit = 0; unit < 16; ++unit) { - if (fp->Base.Base.ShadowSamplers & (1 << unit)) { + if (fp->Base.ShadowSamplers & (1 << unit)) { struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); @@ -185,96 +185,107 @@ static void build_state( } } -void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) +void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; - struct r300_fragment_program_external_state state; + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = fp->Base; + + if (RADEON_DEBUG & DEBUG_PIXEL) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - build_state(r300, r300_fp, &state); - if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - r300_fp->translated = GL_FALSE; - _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); + insert_WPOS_trailer(&compiler); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 3, transformations); } - if (!r300_fp->translated) { - struct r300_fragment_program_compiler compiler; + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - compiler.r300 = r300; - compiler.fp = r300_fp; - compiler.code = &r300_fp->code; - compiler.program = _mesa_clone_program(ctx, &fp->Base); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - insert_WPOS_trailer(&compiler); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } + if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + fp->error = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + fp->translated = GL_TRUE; - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&fp->code); +} - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } +struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program_cont *fp_list; + struct r300_fragment_program *fp; + struct r300_fragment_program_external_state state; + + fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current; + build_state(r300, ctx->FragmentProgram._Current, &state); - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) - r300_fp->error = GL_TRUE; + fp = fp_list->progs; + while (fp) { + if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) { + return r300->selected_fp = fp; + } + fp = fp->next; + } - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->Base.Parameters); - fp->Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; + fp = _mesa_calloc(sizeof(struct r300_fragment_program)); - _mesa_reference_program(ctx, &compiler.program, NULL); + fp->state = state; + fp->translated = GL_FALSE; + fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base); - r300_fp->translated = GL_TRUE; + fp->next = fp_list->progs; + fp_list->progs = fp; - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&r300_fp->code); - } + return r300->selected_fp = fp; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h index 85ea86fecb..5e103ee408 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -30,6 +30,10 @@ #include "main/mtypes.h" -extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); +#include "r300_context.h" + +extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp); + +struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 0133b83796..c9a429943a 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -32,11 +32,34 @@ #include "r300_context.h" #include "r300_fragprog_common.h" +static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache) +{ + struct r300_fragment_program *tmp, *fp = cache->progs; + + while (fp) { + tmp = fp->next; + _mesa_reference_program(ctx, &fp->Base, NULL); + _mesa_free(fp); + fp = tmp; + } +} + +static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache) +{ + struct r300_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + _mesa_free(vp); + vp = tmp; + } +} + static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { struct r300_vertex_program_cont *vp; - struct r300_fragment_program *fp; + struct r300_fragment_program_cont *fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -47,7 +70,7 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: - fp = CALLOC_STRUCT(r300_fragment_program); + fp = CALLOC_STRUCT(r300_fragment_program_cont); return _mesa_init_fragment_program(ctx, &fp->Base, target, id); default: @@ -59,21 +82,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) { + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); + break; + case GL_FRAGMENT_PROGRAM_ARB: + freeFragProgCache(ctx, fp); + break; + } + _mesa_delete_program(ctx, prog); } static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - struct r300_vertex_program_cont *vp = (void *)prog; - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - r300_fp->translated = GL_FALSE; + freeFragProgCache(ctx, fp); + fp->progs = NULL; break; } @@ -85,9 +122,9 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *fp = r300SelectFragmentShader(ctx); if (!fp->translated) - r300TranslateFragmentShader(ctx, &fp->Base); + r300TranslateFragmentShader(ctx, fp); return !fp->error; } else diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 67bd6ec33b..e0996383eb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -452,9 +452,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + r300ContextPtr r300 = R300_CONTEXT(ctx); - return (fp && fp->writes_depth); + return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -1095,18 +1095,17 @@ r300FetchStateParameter(GLcontext * ctx, */ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { - struct r300_fragment_program *fp; + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_program_parameter_list *paramList; GLuint i; if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; - fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; - if (!fp) + if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) return; - paramList = fp->Base.Base.Parameters; + paramList = rmesa->selected_fp->Base->Parameters; if (!paramList) return; @@ -1227,8 +1226,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - struct r300_fragment_program_code *code = &fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; R300_STATECHANGE(r300, fpt); @@ -1268,9 +1266,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { + r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - struct r500_fragment_program_code *code = &fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1318,8 +1316,6 @@ static void r300SetupTextures(GLcontext * ctx) int hw_tmu = 0; int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, }; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; R300_STATECHANGE(r300, txe); R300_STATECHANGE(r300, tex.filter); @@ -1422,7 +1418,7 @@ static void r300SetupTextures(GLcontext * ctx) cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (fp->Base.UsesKill && last_hw_tmu < 0) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { // The KILL operation requires the first texture unit // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; @@ -1464,7 +1460,7 @@ static void r300SetupRSUnit(GLcontext * ctx) else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + InputsRead = r300->selected_fp->Base->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1581,7 +1577,7 @@ static void r500SetupRSUnit(GLcontext * ctx) else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + InputsRead = r300->selected_fp->Base->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -2038,21 +2034,18 @@ static void r300ResetHwState(r300ContextPtr r300) void r300UpdateShaders(r300ContextPtr rmesa) { - GLcontext *ctx; + GLcontext *ctx = rmesa->radeon.glCtx; struct r300_fragment_program *fp; - int i; - - ctx = rmesa->radeon.glCtx; - fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ - if (!fp) { + if (!ctx->FragmentProgram._Current) { _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); return; } if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { + int i; for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; @@ -2071,8 +2064,9 @@ void r300UpdateShaders(r300ContextPtr rmesa) r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error); } - if (!fp->translated || rmesa->radeon.NewGLState) - r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + fp = r300SelectFragmentShader(ctx); + if (!fp->translated) + r300TranslateFragmentShader(ctx, fp); r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); @@ -2104,7 +2098,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, static void r300SetupPixelShader(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; int i, k; @@ -2150,8 +2144,7 @@ static void r300SetupPixelShader(GLcontext *ctx) R300_STATECHANGE(rmesa, fpp); rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->Base.Base, code->constant[i]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2176,7 +2169,7 @@ static void r300SetupPixelShader(GLcontext *ctx) static void r500SetupPixelShader(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = rmesa->selected_fp; int i; struct r500_fragment_program_code *code; @@ -2212,8 +2205,7 @@ static void r500SetupPixelShader(GLcontext *ctx) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->Base.Base, code->constant[i]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); @@ -2276,20 +2268,17 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, rmesa->selected_fp->Base->InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, rmesa->selected_fp->Base->InputsRead); } void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *r300_fp; - - r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; /* should only happenen once, just after context is created */ - if (!r300_fp) + if (!ctx->FragmentProgram._Current) return; r300SetEarlyZState(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index ce4179208e..db4ccce6f1 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead; + GLuint fp_reads = rmesa->selected_fp->Base->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; -- cgit v1.2.3 From bce224c1f108e6c8131dfc953ef607689b83ae7e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 3 Jul 2009 20:06:23 +0200 Subject: r300: don't modify original vertex program Keep the original vertex program untouched because it may be needed after some state change for generating new r300 specific vertex program. --- src/mesa/drivers/dri/r300/r300_context.h | 4 +-- src/mesa/drivers/dri/r300/r300_shader.c | 1 + src/mesa/drivers/dri/r300/r300_vertprog.c | 51 ++++++++++++++++--------------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index fc436e1100..30b6e75039 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -405,6 +405,7 @@ struct r300_hw_state { #undef TAG struct r300_vertex_program { + struct gl_vertex_program *Base; struct r300_vertex_program *next; struct r300_vertex_program_key { @@ -674,9 +675,6 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, extern void r300SelectVertexShader(r300ContextPtr r300); extern void r300InitShaderFuncs(struct dd_function_table *functions); -extern int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, - float *dst); extern void r300InitShaderFunctions(r300ContextPtr r300); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index c9a429943a..7206379ea0 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -50,6 +50,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c while (vp) { tmp = vp->next; + _mesa_reference_vertprog(ctx, &vp->Base, NULL); _mesa_free(vp); vp = tmp; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 49fcb6ff2c..25c42814ec 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -99,15 +99,13 @@ static GLuint combineSwizzles(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz return ret; } -int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { int pi; - struct gl_vertex_program *mesa_vp = &vp->mesa_program; float *dst_o = dst; struct gl_program_parameter_list *paramList; - if (mesa_vp->IsNVProgram) { + if (vp->IsNVProgram) { _mesa_load_tracked_matrices(ctx); for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { @@ -119,16 +117,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx, return dst - dst_o; } - assert(mesa_vp->Base.Parameters); - _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + if (!vp->Base.Parameters) + return 0; + + _mesa_load_state_parameters(ctx, vp->Base.Parameters); - if (mesa_vp->Base.Parameters->NumParameters * 4 > + if (vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH) { fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = mesa_vp->Base.Parameters; + paramList = vp->Base.Parameters; for (pi = 0; pi < paramList->NumParameters; pi++) { switch (paramList->Parameters[pi].Type) { case PROGRAM_STATE_VAR: @@ -1027,9 +1027,9 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) } } -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) +static void r300TranslateVertexShader(struct r300_vertex_program *vp) { + struct prog_instruction *vpi = vp->Base->Base.Instructions; int i; GLuint *inst; unsigned long num_operands; @@ -1450,27 +1450,31 @@ static void translateInsts(struct gl_program *prog) static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, - struct gl_vertex_program *mesa_vp, + const struct gl_vertex_program *mesa_vp, GLint wpos_idx) { struct r300_vertex_program *vp; + struct gl_program *prog; vp = _mesa_calloc(sizeof(*vp)); + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); vp->wpos_idx = wpos_idx; + prog = &vp->Base->Base; + if (RADEON_DEBUG & DEBUG_VERTS) { fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(&mesa_vp->Base); + _mesa_print_program(prog); fflush(stdout); } if (mesa_vp->IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, mesa_vp); + _mesa_insert_mvp_code(ctx, vp->Base); } if (wpos_idx > -1) { - pos_as_texcoord(vp, &mesa_vp->Base); + pos_as_texcoord(vp, prog); } /* Some outputs may be artificially added, to match the inputs of the fragment program. @@ -1488,8 +1492,8 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, if (count > 0) { struct prog_instruction *inst; - _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count); - inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count]; + _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); + inst = &prog->Instructions[prog->NumInstructions - 1 - count]; for (i = 0; i < VERT_RESULT_MAX; ++i) { if (vp->key.OutputsAdded & (1 << i)) { @@ -1510,17 +1514,17 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, } } - translateInsts(&mesa_vp->Base); + translateInsts(prog); if (RADEON_DEBUG & DEBUG_VERTS) { fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(&mesa_vp->Base); + _mesa_print_program(prog); fflush(stdout); } - assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); + assert(prog->NumInstructions); + vp->num_temporaries = prog->NumTemporaries; + r300TranslateVertexShader(vp); return vp; } @@ -1653,10 +1657,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, - (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current, - (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; -- cgit v1.2.3 From 37c319f62f59d7750dd172034e43dfd489f572cc Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 3 Jul 2009 20:14:24 +0200 Subject: r300: implement proper IsProgramNative check for vertex programs --- src/mesa/drivers/dri/r300/r300_context.h | 1 - src/mesa/drivers/dri/r300/r300_shader.c | 9 +++++++-- src/mesa/drivers/dri/r300/r300_state.c | 8 ++++++-- src/mesa/drivers/dri/r300/r300_vertprog.c | 13 ++++++------- src/mesa/drivers/dri/r300/r300_vertprog.h | 4 ++++ 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 30b6e75039..44211a45b3 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -673,7 +673,6 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); -extern void r300SelectVertexShader(r300ContextPtr r300); extern void r300InitShaderFuncs(struct dd_function_table *functions); extern void r300InitShaderFunctions(r300ContextPtr r300); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 7206379ea0..854eb5d80a 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -128,8 +128,13 @@ r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) r300TranslateFragmentShader(ctx, fp); return !fp->error; - } else - return GL_TRUE; + } else { + struct r300_vertex_program *vp = r300SelectVertexShader(ctx); + if (!vp->translated) + r300TranslateVertexShader(vp); + + return !vp->error; + } } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e0996383eb..8c228ab5e7 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2045,6 +2045,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) } if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { + struct r300_vertex_program *vp; int i; for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { rmesa->temp_attrib[i] = @@ -2060,8 +2061,11 @@ void r300UpdateShaders(r300ContextPtr rmesa) rmesa->temp_attrib[i]; } - r300SelectVertexShader(rmesa); - r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error); + vp = r300SelectVertexShader(ctx); + if (!vp->translated) + r300TranslateVertexShader(vp); + + r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); } fp = r300SelectFragmentShader(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 25c42814ec..326183bda4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1027,7 +1027,7 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) } } -static void r300TranslateVertexShader(struct r300_vertex_program *vp) +void r300TranslateVertexShader(struct r300_vertex_program *vp) { struct prog_instruction *vpi = vp->Base->Base.Instructions; int i; @@ -1524,7 +1524,6 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, assert(prog->NumInstructions); vp->num_temporaries = prog->NumTemporaries; - r300TranslateVertexShader(vp); return vp; } @@ -1538,9 +1537,9 @@ static void add_outputs(struct r300_vertex_program_key *key, GLint vert) key->OutputsAdded |= 1 << vert; } -void r300SelectVertexShader(r300ContextPtr r300) +struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) { - GLcontext *ctx = ctx = r300->radeon.glCtx; + r300ContextPtr r300 = R300_CONTEXT(ctx); GLuint InputsRead; struct r300_vertex_program_key wanted_key = { 0 }; GLint i; @@ -1596,14 +1595,14 @@ void r300SelectVertexShader(r300ContextPtr r300) for (vp = vpc->progs; vp; vp = vp->next) if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { - r300->selected_vp = vp; - return; + return r300->selected_vp = vp; } vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; - r300->selected_vp = vp; + + return r300->selected_vp = vp; } #define bump_vpu_count(ptr, new_count) do { \ diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index b552e3fb1b..2dab11c337 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -34,4 +34,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa); +struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx); + +void r300TranslateVertexShader(struct r300_vertex_program *vp); + #endif -- cgit v1.2.3 From 7360f83364b407e949529eeca9f8c421d2da3ded Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 13:17:35 +0200 Subject: r300: move fragment program selection before vertex program selection Prepare for wpos and fogc handling rewrite. --- src/mesa/drivers/dri/r300/r300_state.c | 44 +++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8c228ab5e7..344f021acf 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2035,7 +2035,6 @@ static void r300ResetHwState(r300ContextPtr r300) void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *fp; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ @@ -2044,21 +2043,34 @@ void r300UpdateShaders(r300ContextPtr rmesa) return; } - if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { + { + struct r300_fragment_program *fp; + + fp = r300SelectFragmentShader(ctx); + if (!fp->translated) + r300TranslateFragmentShader(ctx, fp); + + r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); + } + + if (rmesa->options.hw_tcl_enabled) { struct r300_vertex_program *vp; - int i; - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = - TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - &rmesa->dummy_attrib[i]; - } - _tnl_UpdateFixedFunctionProgram(ctx); + if (rmesa->radeon.NewGLState) { + int i; + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = + TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + &rmesa->dummy_attrib[i]; + } + + _tnl_UpdateFixedFunctionProgram(ctx); - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - rmesa->temp_attrib[i]; + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + rmesa->temp_attrib[i]; + } } vp = r300SelectVertexShader(ctx); @@ -2068,12 +2080,6 @@ void r300UpdateShaders(r300ContextPtr rmesa) r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); } - fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); - - r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); - r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); rmesa->radeon.NewGLState = 0; } -- cgit v1.2.3 From d1e4caa6e2b6a1e20feb97ae51703d5b4b18f70b Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 14:57:42 +0200 Subject: r300: recalculate used inputs and outputs after dead code removal --- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index 4a2e1cba40..d2591fa1bd 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -264,6 +264,32 @@ static void process_instruction(struct nqssadce_state* s) } } +static void calculateInputsOutputs(struct gl_program *p) +{ + struct prog_instruction *inst; + int i, tmp; + GLuint InputsRead, OutputsWritten; + + inst = p->Instructions; + InputsRead = 0; + OutputsWritten = 0; + while (inst->Opcode != OPCODE_END) + { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) + InputsRead |= 1 << inst->SrcReg[i].Index; + } + + if (inst->DstReg.File == PROGRAM_OUTPUT) + OutputsWritten |= 1 << inst->DstReg.Index; + + ++inst; + } + + p->InputsRead = InputsRead; + p->OutputsWritten = OutputsWritten; +} void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) { @@ -280,4 +306,6 @@ void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce s.IP--; process_instruction(&s); } + + calculateInputsOutputs(p); } -- cgit v1.2.3 From df5fe747fa08f63b949ba0fd6628060831b562ec Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 5 Jul 2009 02:03:32 +0200 Subject: r300: bind vertex program to fragment program --- src/mesa/drivers/dri/r300/r300_context.h | 4 +- src/mesa/drivers/dri/r300/r300_draw.c | 4 +- src/mesa/drivers/dri/r300/r300_state.c | 4 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 205 ++++++++++++++---------------- 4 files changed, 103 insertions(+), 114 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 44211a45b3..6f99e49050 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -409,9 +409,7 @@ struct r300_vertex_program { struct r300_vertex_program *next; struct r300_vertex_program_key { - GLuint InputsRead; - GLuint OutputsWritten; - GLuint OutputsAdded; + GLuint FpReads; } key; struct r300_vertex_shader_hw_code { diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 4e8b62f186..5420293b91 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -332,7 +332,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { int i, tmp; - tmp = r300->selected_vp->key.InputsRead; + tmp = r300->selected_vp->Base->Base.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -428,7 +428,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); r300UpdateShaderStates(r300); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 344f021acf..0f3198e792 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1456,7 +1456,7 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); @@ -1573,7 +1573,7 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 326183bda4..0b04830350 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -961,10 +961,14 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) { int i; int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = vp->Base->Base.OutputsWritten; + InputsRead = vp->Base->Base.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (vp->key.InputsRead & (1 << i)) + if (InputsRead & (1 << i)) vp->inputs[i] = ++cur_reg; else vp->inputs[i] = -1; @@ -974,13 +978,13 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) for (i = 0; i < VERT_RESULT_MAX; i++) vp->outputs[i] = -1; - assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; } @@ -990,39 +994,39 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) * pretend it does by skipping output index reg so the colors * get written into appropriate output vectors. */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || - vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { vp->outputs[VERT_RESULT_COL1] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || - vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { vp->outputs[VERT_RESULT_BFC0] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { vp->outputs[VERT_RESULT_BFC1] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (vp->key.OutputsWritten & (1 << i)) { + if (OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; } } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; } } @@ -1255,6 +1259,8 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, ++vpi; vpi->Opcode = OPCODE_END; + + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + vp->wpos_idx); } static void pos_as_texcoord(struct r300_vertex_program *vp, @@ -1448,6 +1454,63 @@ static void translateInsts(struct gl_program *prog) } } +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint OutputsAdded, FpReads; + int i, count; + + OutputsAdded = count = 0; + FpReads = r300->selected_fp->Base->InputsRead; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); + inst = &prog->Instructions[prog->NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + + prog->OutputsWritten |= OutputsAdded; + } +} + +#undef ADD_OUTPUT + static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp, @@ -1477,42 +1540,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, pos_as_texcoord(vp, prog); } - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - { - int i, count = 0; - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (vp->key.OutputsAdded & (1 << i)) { - ++count; - } - } - - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (vp->key.OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - } - } + addArtificialOutputs(ctx, prog); translateInsts(prog); @@ -1528,76 +1556,39 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, return vp; } -static void add_outputs(struct r300_vertex_program_key *key, GLint vert) -{ - if (key->OutputsWritten & (1 << vert)) - return; - - key->OutputsWritten |= 1 << vert; - key->OutputsAdded |= 1 << vert; -} - struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint InputsRead; struct r300_vertex_program_key wanted_key = { 0 }; - GLint i; struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; GLint wpos_idx; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - - wpos_idx = -1; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + wanted_key.FpReads = r300->selected_fp->Base->InputsRead; - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found\n"); - _mesa_exit(-1); + for (vp = vpc->progs; vp; vp = vp->next) + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { + return r300->selected_vp = vp; } - wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - wpos_idx = i; - } - - if (vpc->mesa_program.IsPositionInvariant) { - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); - } else { - add_outputs(&wanted_key, VERT_RESULT_HPOS); - } - - if (InputsRead & FRAG_BIT_COL0) { - add_outputs(&wanted_key, VERT_RESULT_COL0); - } + wpos_idx = -1; + if (wanted_key.FpReads & FRAG_BIT_WPOS) { + GLint i; - if (InputsRead & FRAG_BIT_COL1) { - add_outputs(&wanted_key, VERT_RESULT_COL1); - } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(wanted_key.FpReads & (FRAG_BIT_TEX(i)))) + break; - if (InputsRead & FRAG_BIT_FOGC) { - add_outputs(&wanted_key, VERT_RESULT_FOGC); - } + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found\n"); + _mesa_exit(-1); + } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); - } + wpos_idx = i; } - for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) - == 0) { - return r300->selected_vp = vp; - } - vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; @@ -1663,8 +1654,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); inst_count = (prog->hw_code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead), - _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), + _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | -- cgit v1.2.3 From f79ef95df4f19124c24e59583bf9fb1e347d8f2b Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 5 Jul 2009 02:32:51 +0200 Subject: r300: rewrite FOGC and HPOS attribs handling Rewrite vertex and fragment programs so that we don't have to do any hacks on lower level. --- src/mesa/drivers/dri/r300/r300_context.h | 8 ++- src/mesa/drivers/dri/r300/r300_emit.c | 9 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 62 ++++++++++++++++++- src/mesa/drivers/dri/r300/r300_shader.c | 3 +- src/mesa/drivers/dri/r300/r300_state.c | 53 ---------------- src/mesa/drivers/dri/r300/r300_swtcl.c | 38 +++++------- src/mesa/drivers/dri/r300/r300_vertprog.c | 79 ++++++++++++++---------- 7 files changed, 130 insertions(+), 122 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 6f99e49050..32104b8158 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -410,6 +410,8 @@ struct r300_vertex_program { struct r300_vertex_program_key { GLuint FpReads; + GLuint FogAttr; + GLuint WPosAttr; } key; struct r300_vertex_shader_hw_code { @@ -425,7 +427,6 @@ struct r300_vertex_program { int pos_end; int num_temporaries; /* Number of temp vars used by program */ - int wpos_idx; int inputs[VERT_ATTRIB_MAX]; int outputs[VERT_RESULT_MAX]; }; @@ -560,6 +561,11 @@ struct r300_fragment_program { GLuint optimization; struct r300_fragment_program *next; + + /* attribute that we are sending the WPOS in */ + gl_frag_attrib wpos_attr; + /* attribute that we are sending the fog coordinate in */ + gl_frag_attrib fog_attr; }; struct r300_fragment_program_cont { diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index c3817721dc..707d1284ed 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -116,15 +116,6 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) } } - if (fp_reads & FRAG_BIT_WPOS) { - ret |= (4 << (3 * first_free_texcoord)); - ++first_free_texcoord; - } - - if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) { - ret |= 4 << (3 * first_free_texcoord); - } - if (first_free_texcoord > 8) { fprintf(stderr, "\tout of free texcoords\n"); _mesa_exit(-1); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index b25cf24007..e90be9b7f8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -69,8 +69,10 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { GLuint InputsRead = compiler->fp->Base->InputsRead; - if (!(InputsRead & FRAG_BIT_WPOS)) + if (!(InputsRead & FRAG_BIT_WPOS)) { + compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; return; + } static gl_state_index tokens[STATE_LENGTH] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 @@ -78,10 +80,23 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) struct prog_instruction *fpi; GLuint window_index; int i = 0; + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); + InputsRead |= 1 << i; + compiler->fp->Base->InputsRead = InputsRead; + compiler->fp->wpos_attr = i; + break; + } + } + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); _mesa_insert_instructions(compiler->program, 0, 3); fpi = compiler->program->Instructions; + i = 0; /* perspective divide */ fpi[i].Opcode = OPCODE_RCP; @@ -92,7 +107,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) fpi[i].DstReg.CondMask = COND_TR; fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; i++; @@ -104,7 +119,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) fpi[i].DstReg.CondMask = COND_TR; fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; @@ -147,6 +162,45 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) } } +static void rewriteFog(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program *fp = compiler->fp; + GLuint InputsRead; + int i; + + InputsRead = fp->Base->InputsRead; + + if (!(InputsRead & FRAG_BIT_FOGC)) { + fp->fog_attr = FRAG_ATTRIB_MAX; + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); + InputsRead |= 1 << i; + fp->Base->InputsRead = InputsRead; + fp->fog_attr = i; + break; + } + } + + { + struct prog_instruction *inst; + + inst = compiler->program->Instructions; + while (inst->Opcode != OPCODE_END) { + const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) + inst->SrcReg[i].Index = fp->fog_attr; + } + ++inst; + } + } +} + static GLuint build_dtm(GLuint depthmode) { switch(depthmode) { @@ -204,6 +258,8 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *f insert_WPOS_trailer(&compiler); + rewriteFog(&compiler); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { struct radeon_program_transformation transformations[] = { { &r500_transform_TEX, &compiler }, diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 854eb5d80a..62228a3786 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -66,8 +66,7 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: vp = CALLOC_STRUCT(r300_vertex_program_cont); - return _mesa_init_vertex_program(ctx, &vp->mesa_program, - target, id); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0f3198e792..bae2f0f3cb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1519,29 +1519,6 @@ static void r300SetupRSUnit(GLcontext * ctx) ++fp_reg; } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - - if (InputsRead & FRAG_BIT_FOGC) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0); - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } else { - WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); - } - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0); @@ -1640,36 +1617,6 @@ static void r500SetupRSUnit(GLcontext * ctx) ++fp_reg; } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - - if (InputsRead & FRAG_BIT_FOGC) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } else { - WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); - } - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index db4ccce6f1..d73508d36e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -192,31 +192,23 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ } } - /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ - if (fp_reads & FRAG_BIT_WPOS) { - if (first_free_tex >= ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tout of free texcoords to write w pos\n"); - _mesa_exit(-1); - } + if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; - InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); - OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0); - ++first_free_tex; + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } - if (fp_reads & FRAG_BIT_FOGC) { - if (first_free_tex >= ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); - _mesa_exit(-1); - } + if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; - InputsRead |= 1 << VERT_ATTRIB_FOG; - OutputsWritten |= 1 << VERT_RESULT_FOGC; - GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); - ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + _mesa_exit(-1); } R300_NEWPRIM(rmesa); @@ -497,11 +489,13 @@ void r300RenderStart(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); + + r300UpdateShaders(rmesa); + r300PrepareVertices(ctx); r300ValidateBuffers(ctx); - r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 0b04830350..6f9b4db40f 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1223,8 +1223,7 @@ void r300TranslateVertexShader(struct r300_vertex_program *vp) } } -static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, - GLuint temp_index) +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) { struct prog_instruction *vpi; @@ -1248,7 +1247,7 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, vpi->Opcode = OPCODE_MOV; vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; vpi->DstReg.WriteMask = WRITEMASK_XYZW; vpi->DstReg.CondMask = COND_TR; @@ -1259,12 +1258,9 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, ++vpi; vpi->Opcode = OPCODE_END; - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + vp->wpos_idx); } -static void pos_as_texcoord(struct r300_vertex_program *vp, - struct gl_program *prog) +static void pos_as_texcoord(struct gl_program *prog, int tex_id) { struct prog_instruction *vpi; GLuint tempregi = prog->NumTemporaries; @@ -1278,7 +1274,37 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, } } - insert_wpos(vp, prog, tempregi); + insert_wpos(prog, tempregi, tex_id); + + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + @TODO + We can put X001 swizzle only if input components are directly mapped from output components. + For some insts we need to skip source swizzles and add: MOV OUTPUT[fog_attr].yzw, CONST[0].0001 + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + int i; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + + for (i = 0; i < _mesa_num_inst_src_regs(vpi->Opcode); ++i) { + vpi->SrcReg[i].Swizzle = combineSwizzles(vpi->SrcReg[i].Swizzle, SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + } + } + + ++vpi; + } + + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); } static int translateABS(struct gl_program *prog, int pos) @@ -1513,16 +1539,15 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, - const struct gl_vertex_program *mesa_vp, - GLint wpos_idx) + const struct gl_vertex_program *mesa_vp) { + r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; struct gl_program *prog; vp = _mesa_calloc(sizeof(*vp)); vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; prog = &vp->Base->Base; @@ -1532,12 +1557,16 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, fflush(stdout); } - if (mesa_vp->IsPositionInvariant) { + if (vp->Base->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, vp->Base); } - if (wpos_idx > -1) { - pos_as_texcoord(vp, prog); + if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); + } + + if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); } addArtificialOutputs(ctx, prog); @@ -1562,34 +1591,20 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) struct r300_vertex_program_key wanted_key = { 0 }; struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; - GLint wpos_idx; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; - for (vp = vpc->progs; vp; vp = vp->next) + for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { return r300->selected_vp = vp; } - - wpos_idx = -1; - if (wanted_key.FpReads & FRAG_BIT_WPOS) { - GLint i; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(wanted_key.FpReads & (FRAG_BIT_TEX(i)))) - break; - - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found\n"); - _mesa_exit(-1); - } - - wpos_idx = i; } - vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx); + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); vp->next = vpc->progs; vpc->progs = vp; -- cgit v1.2.3 From e43cc28c1b6face903f3c977d6eb887335bec886 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 16:50:25 +0200 Subject: r300: move depth output rewrite out of NQSSADCE --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 48 ++++++++++++++++++++++-- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 41 +------------------- src/mesa/drivers/dri/r300/radeon_nqssadce.h | 6 +-- 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index e90be9b7f8..d5ef18b2ad 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -239,6 +239,46 @@ static void build_state( } } +static void rewrite_depth_out(struct gl_program *prog) +{ + struct prog_instruction *inst; + + for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -260,6 +300,8 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *f rewriteFog(&compiler); + rewrite_depth_out(compiler.program); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { struct radeon_program_transformation transformations[] = { { &r500_transform_TEX, &compiler }, @@ -287,16 +329,14 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *f struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadce_init, .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE + .BuildSwizzle = &r500FPBuildSwizzle }; radeonNqssaDce(ctx, compiler.program, &nqssadce); } else { struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadce_init, .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE + .BuildSwizzle = &r300FPBuildSwizzle }; radeonNqssaDce(ctx, compiler.program, &nqssadce); } diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index d2591fa1bd..82dfc31dd0 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -56,7 +56,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil * * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. */ -static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) { struct prog_src_register tmp = srcreg; int i; @@ -121,40 +121,6 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, return inst; } - -static void rewrite_depth_out(struct prog_instruction *inst) -{ - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - return; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } -} - static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) { int nsrc = _mesa_num_inst_src_regs(inst->Opcode); @@ -189,11 +155,6 @@ static void process_instruction(struct nqssadce_state* s) return; if (inst->Opcode != OPCODE_KIL) { - if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) - rewrite_depth_out(inst); - } - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h index a4f94abcb6..e3341692e4 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h @@ -83,14 +83,10 @@ struct radeon_nqssadce_descr { */ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - /** - * Rewrite instructions that write to DEPR.z to write to DEPR.w - * instead (rewriting is done *before* the WriteMask test). - */ - GLboolean RewriteDepthOut; void *Data; }; void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ -- cgit v1.2.3 From 96b2eb18c5059d441873bfa562bd2ff9dec46a51 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 15:22:22 +0200 Subject: r300: handle ARB_vertex_program specific instructions in NQSSADCE --- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index 82dfc31dd0..c56598b4a5 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -188,6 +188,8 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_MAX: case OPCODE_MIN: case OPCODE_MUL: + case OPCODE_SGE: + case OPCODE_SLT: inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); break; @@ -219,6 +221,18 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_TXP: inst = track_used_srcreg(s, inst, 0, 0xf); break; + case OPCODE_DST: + inst = track_used_srcreg(s, inst, 0, 0x6); + inst = track_used_srcreg(s, inst, 1, 0xa); + break; + case OPCODE_EXP: + case OPCODE_LOG: + case OPCODE_POW: + inst = track_used_srcreg(s, inst, 0, 0x3); + break; + case OPCODE_LIT: + inst = track_used_srcreg(s, inst, 0, 0xb); + break; default: _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); return; -- cgit v1.2.3 From 12a6d73c7590c37ec8ae3f2c8c737791e4461d77 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Mon, 13 Jul 2009 19:23:18 +0200 Subject: r300: handle relative addressing in NQSSADCE --- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 10 +++++++++- src/mesa/drivers/dri/r300/radeon_nqssadce.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index c56598b4a5..d917da58bb 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -46,6 +46,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil switch(file) { case PROGRAM_TEMPORARY: return &s->Temps[index]; case PROGRAM_OUTPUT: return &s->Outputs[index]; + case PROGRAM_ADDRESS: return &s->Address; default: return 0; } } @@ -114,7 +115,13 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, deswz_source = sourced; } - struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + struct register_state *regstate; + + if (inst->SrcReg[src].RelAddr) + regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + else + regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) regstate->Sourced |= deswz_source & 0xf; @@ -178,6 +185,7 @@ static void process_instruction(struct nqssadce_state* s) * might change the instruction stream under us, so we have * to be careful with the inst pointer. */ switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h index e3341692e4..8626f21c25 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h @@ -58,6 +58,7 @@ struct nqssadce_state { */ struct register_state Temps[MAX_PROGRAM_TEMPS]; struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Address; }; -- cgit v1.2.3 From 70448b9f95b4ca56526458d207a28727f71e8d3c Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 16:52:48 +0200 Subject: r300: operate on copy of a program when pairing instructions We need to keep unpaired program for vertex program NQSSADCE. --- src/mesa/drivers/dri/r300/radeon_program_pair.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 906d36e522..d6fb474cf2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -870,7 +870,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, _mesa_bzero(&s, sizeof(s)); s.Ctx = ctx; - s.Program = program; + s.Program = _mesa_clone_program(ctx, program); s.Handler = handler; s.UserData = userdata; s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; @@ -904,6 +904,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, _mesa_free(s.ValuePool); _mesa_free(s.ReaderPool); + _mesa_reference_program(ctx, &s.Program, NULL); + return !s.Error; } -- cgit v1.2.3 From 65d9f23c7c3e99dfe038b1306f96930e1228de11 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 5 Jul 2009 02:34:48 +0200 Subject: r300: use NQSSADCE for vertex programs --- src/mesa/drivers/dri/r300/r300_vertprog.c | 133 +++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 6f9b4db40f..fb94eea07a 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -34,11 +34,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/program.h" #include "shader/programopt.h" #include "shader/prog_instruction.h" +#include "shader/prog_optimize.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" @@ -1537,6 +1539,86 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) #undef ADD_OUTPUT +static GLuint getUsedComponents(const GLuint swizzle) +{ + GLuint ret; + + ret = 0; + + /* need to mask out ZERO, ONE and NIL swizzles */ + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_X) & 0x3); + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Y) & 0x3); + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Z) & 0x3); + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_W) & 0x3); + + return ret; +} + +static GLuint trackUsedComponents(const struct gl_program *prog, const GLuint attrib) +{ + struct prog_instruction *inst; + int tmp, i; + GLuint ret; + + inst = prog->Instructions; + ret = 0; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == attrib) { + ret |= getUsedComponents(inst->SrcReg[i].Swizzle); + } + } + ++inst; + } + + return ret; +} + +static void nqssadceInit(struct nqssadce_state* s) +{ + r300ContextPtr r300 = R300_CONTEXT(s->Ctx); + GLuint fp_reads; + + fp_reads = r300->selected_fp->Base->InputsRead; + { + GLuint tmp; + + if (fp_reads & FRAG_BIT_COL0) { + tmp = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_COL0); + s->Outputs[VERT_RESULT_COL0].Sourced = tmp; + s->Outputs[VERT_RESULT_BFC0].Sourced = tmp; + } + + if (fp_reads & FRAG_BIT_COL1) { + tmp = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_COL1); + s->Outputs[VERT_RESULT_COL1].Sourced = tmp; + s->Outputs[VERT_RESULT_BFC1].Sourced = tmp; + } + } + + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_TEX0 + i); + } + } + } + + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp) @@ -1579,8 +1661,57 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, fflush(stdout); } + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(ctx, prog, &nqssadce); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, prog); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(prog); + fflush(stdout); + } + } + assert(prog->NumInstructions); - vp->num_temporaries = prog->NumTemporaries; + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = prog->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; + } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + vp->num_temporaries = max + 1; + } return vp; } -- cgit v1.2.3 From bdc8a95fc906dcc617995479fbffbf8fdd1f4e34 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 4 Jul 2009 23:38:59 +0200 Subject: r300: removed unnecessary params We don't have check which attributes are used by fragment program - it's already done by NQSSADCE. --- src/mesa/drivers/dri/r300/r300_emit.c | 10 +++++----- src/mesa/drivers/dri/r300/r300_emit.h | 4 ++-- src/mesa/drivers/dri/r300/r300_state.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 707d1284ed..feb3370f37 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -81,17 +81,17 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) return vic_1; } -GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes) { GLuint ret = 0; if (vp_writes & (1 << VERT_RESULT_HPOS)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0) + if (vp_writes & (1 << VERT_RESULT_COL0)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; - if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1) + if (vp_writes & (1 << VERT_RESULT_COL1)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; /* Two sided lighting works only if all 4 colors are written */ @@ -105,12 +105,12 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) return ret; } -GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) { GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) { + if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) { ret |= (4 << (3 * first_free_texcoord)); ++first_free_texcoord; } diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 2fb8b82d3a..3f8c60ffae 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -225,7 +225,7 @@ extern void r300EmitCacheFlush(r300ContextPtr rmesa); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); -extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); -extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes); #endif diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index bae2f0f3cb..4d504d14e3 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2225,8 +2225,8 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, rmesa->selected_fp->Base->InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, rmesa->selected_fp->Base->InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); } void r300UpdateShaderStates(r300ContextPtr rmesa) -- cgit v1.2.3 From 4efb9f053c84fbdced373cc62fe06b3158b59015 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 5 Jul 2009 03:17:32 +0200 Subject: r300: fix WPOS for SWTCL --- src/mesa/drivers/dri/r300/r300_swtcl.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index d73508d36e..56ed519cf4 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -150,6 +150,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } + if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + + if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + /** * Sending only one texcoord component may lead to lock up, * so for all textures always output 4 texcoord components to RS. @@ -192,20 +208,6 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ } } - if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; - - VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; - RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); - } - - if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; - - VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; - RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); - } - if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); _mesa_exit(-1); -- cgit v1.2.3 From 4eff323731b0d65e1f2817e96435807418d833cc Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 15:09:20 +0200 Subject: r300: hw can handle per component negations in vertex shaders Reported-by: Nicolai Haehnle --- src/mesa/drivers/dri/r300/r300_vertprog.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index fb94eea07a..ea8d25d5a9 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1388,32 +1388,9 @@ static int translateSUB(struct gl_program *prog, int pos) static int translateSWZ(struct gl_program *prog, int pos) { - struct prog_instruction *inst; - GLuint orig_negate, orig_writemask; - - inst = &prog->Instructions[pos]; - orig_negate = inst->SrcReg[0].Negate; - orig_writemask = inst->DstReg.WriteMask; - - inst->Opcode = OPCODE_MOV; + prog->Instructions[pos].Opcode = OPCODE_MOV; - /* If all relevant components are either negated or not negated at the same time, we are ok. - */ - if ((orig_negate & orig_writemask) == 0 || (orig_negate & orig_writemask) == (NEGATE_XYZW & orig_writemask)) - return 0; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - inst->DstReg.WriteMask = orig_writemask & (orig_negate ^ NEGATE_XYZW); - inst->SrcReg[0].Negate = NEGATE_NONE; - ++inst; - - *inst = *(inst-1); - inst->DstReg.WriteMask = orig_writemask & orig_negate; - inst->SrcReg[0].Negate = NEGATE_XYZW; - - return 1; + return 0; } static int translateXPD(struct gl_program *prog, int pos) -- cgit v1.2.3 From 48cc352a71c728e0ce092dd7d7ec0945db304907 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 15:53:01 +0200 Subject: r300: fix StrideB == 0 case when converting data format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_draw.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 5420293b91..9769ff5399 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -195,6 +195,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st } GLfloat *dst_ptr, *tmp; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); switch (input->Type) { @@ -228,7 +233,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st type = GL_FLOAT; r300_attr.free_needed = GL_TRUE; r300_attr.data = tmp; - r300_attr.stride = sizeof(GLfloat) * input->Size; + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = sizeof(GLfloat) * input->Size; + } r300_attr.dwords = input->Size; } else { type = input->Type; -- cgit v1.2.3 From ec854729d12a533ed00b9a9e6a5942aede2f5d1e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 16:16:11 +0200 Subject: r300: fix indentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index d5ef18b2ad..c7914649dd 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -368,10 +368,10 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) fp = fp_list->progs; while (fp) { - if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) { - return r300->selected_fp = fp; - } - fp = fp->next; + if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) { + return r300->selected_fp = fp; + } + fp = fp->next; } fp = _mesa_calloc(sizeof(struct r300_fragment_program)); -- cgit v1.2.3 From a0204ce456435f6ee38d8c14d25ae251940bf55f Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 16:26:23 +0200 Subject: r300: document r300_fragment_program_cont struct --- src/mesa/drivers/dri/r300/r300_context.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 32104b8158..f40e8ac394 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -569,7 +569,12 @@ struct r300_fragment_program { }; struct r300_fragment_program_cont { + /* This is the unmodified fragment program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific fragment program based on this + */ struct gl_fragment_program Base; + /* This is the list of hw specific fragment programs derived from Base */ struct r300_fragment_program *progs; }; -- cgit v1.2.3 From b3716eeb61b75dc661a1485f9bea9275a88a1d81 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 16:34:04 +0200 Subject: r300: document r300_vertex_program_cont structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_context.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f40e8ac394..f7af7d4e57 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -432,7 +432,12 @@ struct r300_vertex_program { }; struct r300_vertex_program_cont { - struct gl_vertex_program mesa_program; /* Must be first */ + /* This is the unmodified vertex program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific vertex program based on this. + */ + struct gl_vertex_program mesa_program; + /* This is the list of hw specific vertex programs derived from mesa_program */ struct r300_vertex_program *progs; }; @@ -571,7 +576,7 @@ struct r300_fragment_program { struct r300_fragment_program_cont { /* This is the unmodified fragment program mesa provided us with. * We need to keep it unchanged because we may need to create another - * hw specific fragment program based on this + * hw specific fragment program based on this. */ struct gl_fragment_program Base; /* This is the list of hw specific fragment programs derived from Base */ -- cgit v1.2.3 From 1a5520fcd3842cc3198bff143d2af5c169eebc26 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 16:37:11 +0200 Subject: r300: move variables declarations --- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index d917da58bb..840c9733b1 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -250,7 +250,6 @@ static void process_instruction(struct nqssadce_state* s) static void calculateInputsOutputs(struct gl_program *p) { struct prog_instruction *inst; - int i, tmp; GLuint InputsRead, OutputsWritten; inst = p->Instructions; @@ -258,8 +257,10 @@ static void calculateInputsOutputs(struct gl_program *p) OutputsWritten = 0; while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { + int i, num_src_regs; + + num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < num_src_regs; ++i) { if (inst->SrcReg[i].File == PROGRAM_INPUT) InputsRead |= 1 << inst->SrcReg[i].Index; } -- cgit v1.2.3 From acd33600411102872af579edc4206b61eb51bb65 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 16:41:52 +0200 Subject: r300: minor fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split initializations becase the vars are of different type. Reported-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_vertprog.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index ea8d25d5a9..703fba4874 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1473,7 +1473,8 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) GLuint OutputsAdded, FpReads; int i, count; - OutputsAdded = count = 0; + OutputsAdded = 0; + count = 0; FpReads = r300->selected_fp->Base->InputsRead; ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); -- cgit v1.2.3 From 3f5382819e31071c2af6cb39c1ca09bf3243f83f Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 19:10:58 +0200 Subject: r300: fix swizzle masking in getUsedComponents --- src/mesa/drivers/dri/r300/r300_vertprog.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 703fba4874..9f807dbc99 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1524,10 +1524,14 @@ static GLuint getUsedComponents(const GLuint swizzle) ret = 0; /* need to mask out ZERO, ONE and NIL swizzles */ - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_X) & 0x3); - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Y) & 0x3); - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Z) & 0x3); - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_W) & 0x3); + if (GET_SWZ(swizzle, SWIZZLE_X) <= SWIZZLE_W) + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_X)); + if (GET_SWZ(swizzle, SWIZZLE_Y) <= SWIZZLE_W) + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Y)); + if (GET_SWZ(swizzle, SWIZZLE_Z) <= SWIZZLE_W) + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Z)); + if (GET_SWZ(swizzle, SWIZZLE_W) <= SWIZZLE_W) + ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_W)); return ret; } -- cgit v1.2.3 From f06910f6c34f199dc187bd69ff1f6889ba498217 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sun, 12 Jul 2009 02:03:25 +0000 Subject: r300: Fix fogcoord rewriting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only care about the actual fogcoord itself now, reducing the rewriting done for the vertex program. The rewriting of source operand swizzles in the fragment program takes care that fogcoord.yzw = 001. This should fix fogcoord rewriting entirely, which had been horribly broken in the face of dot-product instructions, and just broken (though not horribly so) in the face of almost every other instruction (the W component would be incorrect for most arithmetic instructions). Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 14 ++++- src/mesa/drivers/dri/r300/r300_vertprog.c | 65 ++++++++---------------- src/mesa/drivers/dri/r300/radeon_program.h | 32 ++++++++++++ 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index c7914649dd..f5c4c0f4a0 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -162,6 +162,14 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) } } + +/** + * Rewrite fragment.fogcoord to use a texture coordinate slot. + * Note that fogcoord is forced into an X001 pattern, and this enforcement + * is done here. + * + * See also the counterpart rewriting for vertex programs. + */ static void rewriteFog(struct r300_fragment_program_compiler *compiler) { struct r300_fragment_program *fp = compiler->fp; @@ -193,8 +201,12 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler) while (inst->Opcode != OPCODE_END) { const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { inst->SrcReg[i].Index = fp->fog_attr; + inst->SrcReg[i].Swizzle = combine_swizzles( + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), + inst->SrcReg[i].Swizzle); + } } ++inst; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 9f807dbc99..c0b116156a 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -74,33 +74,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) -static GLuint combineSwizzles(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) -{ - GLuint ret = 0; - - if (swz_x == SWIZZLE_ZERO || swz_x == SWIZZLE_ONE) - ret |= swz_x; - else - ret |= GET_SWZ(src, swz_x); - - if (swz_y == SWIZZLE_ZERO || swz_y == SWIZZLE_ONE) - ret |= swz_y << 3; - else - ret |= GET_SWZ(src, swz_y) << 3; - - if (swz_z == SWIZZLE_ZERO || swz_z == SWIZZLE_ONE) - ret |= swz_z << 6; - else - ret |= GET_SWZ(src, swz_z) << 6; - - if (swz_w == SWIZZLE_ZERO || swz_w == SWIZZLE_ONE) - ret |= swz_w << 9; - else - ret |= GET_SWZ(src, swz_w) << 9; - - return ret; -} - static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { int pi; @@ -1282,24 +1255,28 @@ static void pos_as_texcoord(struct gl_program *prog, int tex_id) } /** - @TODO - We can put X001 swizzle only if input components are directly mapped from output components. - For some insts we need to skip source swizzles and add: MOV OUTPUT[fog_attr].yzw, CONST[0].0001 + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. */ static void fog_as_texcoord(struct gl_program *prog, int tex_id) { struct prog_instruction *vpi; - int i; vpi = prog->Instructions; while (vpi->Opcode != OPCODE_END) { if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - - for (i = 0; i < _mesa_num_inst_src_regs(vpi->Opcode); ++i) { - vpi->SrcReg[i].Swizzle = combineSwizzles(vpi->SrcReg[i].Swizzle, SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); - } + vpi->DstReg.WriteMask = WRITEMASK_X; } ++vpi; @@ -1329,7 +1306,7 @@ static int translateDP3(struct gl_program *prog, int pos) inst = &prog->Instructions[pos]; inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); return 0; } @@ -1341,7 +1318,7 @@ static int translateDPH(struct gl_program *prog, int pos) inst = &prog->Instructions[pos]; inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); return 0; } @@ -1409,13 +1386,13 @@ static int translateXPD(struct gl_program *prog, int pos) inst->Opcode = OPCODE_MUL; inst->DstReg.File = PROGRAM_TEMPORARY; inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combineSwizzles(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); ++inst; inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combineSwizzles(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combineSwizzles(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); inst->SrcReg[1].Negate ^= NEGATE_XYZW; inst->SrcReg[2].File = PROGRAM_TEMPORARY; inst->SrcReg[2].Index = tmp_idx; @@ -1768,12 +1745,12 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) struct r300_vertex_program *prog = rmesa->selected_vp; int inst_count = 0; int param_count = 0; - + /* Reset state, in case we don't use something */ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - + R300_STATECHANGE(rmesa, vpp); param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index b411f69bc8..88474d43a2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -52,6 +52,38 @@ enum { #define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) #define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) +static inline GLuint get_swz(GLuint swz, GLuint idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline GLuint combine_swizzles(GLuint src, GLuint swz) +{ + GLuint ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + + return ret; +} + + /** * Transformation context that is passed to local transformations. * -- cgit v1.2.3 From 582bd3466514b9fe24f18d99af2945f02709aacd Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 12 Jul 2009 15:09:15 +0200 Subject: r300: always assume all components are read by fragment program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Components of input attributes that are used by fragment program aren't part of vertex program key, and that may lead to situations when vertex program writes only TEX1.xy and fragment program reads TEX1.xyz, resulting in rendering errors. Reported-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_vertprog.c | 54 +++---------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index c0b116156a..de32013032 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1494,46 +1494,6 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) #undef ADD_OUTPUT -static GLuint getUsedComponents(const GLuint swizzle) -{ - GLuint ret; - - ret = 0; - - /* need to mask out ZERO, ONE and NIL swizzles */ - if (GET_SWZ(swizzle, SWIZZLE_X) <= SWIZZLE_W) - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_X)); - if (GET_SWZ(swizzle, SWIZZLE_Y) <= SWIZZLE_W) - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Y)); - if (GET_SWZ(swizzle, SWIZZLE_Z) <= SWIZZLE_W) - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_Z)); - if (GET_SWZ(swizzle, SWIZZLE_W) <= SWIZZLE_W) - ret |= 1 << (GET_SWZ(swizzle, SWIZZLE_W)); - - return ret; -} - -static GLuint trackUsedComponents(const struct gl_program *prog, const GLuint attrib) -{ - struct prog_instruction *inst; - int tmp, i; - GLuint ret; - - inst = prog->Instructions; - ret = 0; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == attrib) { - ret |= getUsedComponents(inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - - return ret; -} - static void nqssadceInit(struct nqssadce_state* s) { r300ContextPtr r300 = R300_CONTEXT(s->Ctx); @@ -1541,18 +1501,14 @@ static void nqssadceInit(struct nqssadce_state* s) fp_reads = r300->selected_fp->Base->InputsRead; { - GLuint tmp; - if (fp_reads & FRAG_BIT_COL0) { - tmp = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_COL0); - s->Outputs[VERT_RESULT_COL0].Sourced = tmp; - s->Outputs[VERT_RESULT_BFC0].Sourced = tmp; + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; } if (fp_reads & FRAG_BIT_COL1) { - tmp = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_COL1); - s->Outputs[VERT_RESULT_COL1].Sourced = tmp; - s->Outputs[VERT_RESULT_BFC1].Sourced = tmp; + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; } } @@ -1560,7 +1516,7 @@ static void nqssadceInit(struct nqssadce_state* s) int i; for (i = 0; i < 8; ++i) { if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = trackUsedComponents(r300->selected_fp->Base, FRAG_ATTRIB_TEX0 + i); + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; } } } -- cgit v1.2.3