diff options
author | Maciej Cencora <m.cencora@gmail.com> | 2009-07-13 20:29:11 +0200 |
---|---|---|
committer | Maciej Cencora <m.cencora@gmail.com> | 2009-07-13 20:29:11 +0200 |
commit | 9226e3d6a68e5e079456c5e7ba2a79e00a33bb89 (patch) | |
tree | a66968870e967e8e20f6203d04c3daed1a1bdbeb /src/mesa/drivers/dri | |
parent | 0dc700850acb81c7088ab740959441521f8d38d9 (diff) | |
parent | 582bd3466514b9fe24f18d99af2945f02709aacd (diff) |
Merge branch 'shaders_cleanup'
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 39 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_draw.c | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_emit.c | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_emit.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog_common.c | 290 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog_common.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_shader.c | 64 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 166 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_swtcl.c | 50 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 656 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_nqssadce.c | 94 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_nqssadce.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_program.h | 32 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_program_pair.c | 4 |
17 files changed, 883 insertions, 570 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index b7d75426c5..6f3aab986d 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -418,8 +418,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); - _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); - /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 026c33c67c..f7af7d4e57 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -405,12 +405,13 @@ struct r300_hw_state { #undef TAG struct r300_vertex_program { + struct gl_vertex_program *Base; struct r300_vertex_program *next; struct r300_vertex_program_key { - GLuint InputsRead; - GLuint OutputsWritten; - GLuint OutputsAdded; + GLuint FpReads; + GLuint FogAttr; + GLuint WPosAttr; } key; struct r300_vertex_shader_hw_code { @@ -426,13 +427,17 @@ struct r300_vertex_program { int pos_end; int num_temporaries; /* Number of temp vars used by program */ - int wpos_idx; int inputs[VERT_ATTRIB_MAX]; int outputs[VERT_RESULT_MAX]; }; struct r300_vertex_program_cont { - struct gl_vertex_program mesa_program; /* Must be first */ + /* This is the unmodified vertex program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific vertex program based on this. + */ + struct gl_vertex_program mesa_program; + /* This is the list of hw specific vertex programs derived from mesa_program */ struct r300_vertex_program *progs; }; @@ -546,7 +551,7 @@ struct r500_fragment_program_code { * to render with that program. */ struct r300_fragment_program { - struct gl_fragment_program Base; + struct gl_program *Base; GLboolean translated; GLboolean error; @@ -559,6 +564,23 @@ struct r300_fragment_program { GLboolean writes_depth; GLuint optimization; + + struct r300_fragment_program *next; + + /* attribute that we are sending the WPOS in */ + gl_frag_attrib wpos_attr; + /* attribute that we are sending the fog coordinate in */ + gl_frag_attrib fog_attr; +}; + +struct r300_fragment_program_cont { + /* This is the unmodified fragment program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific fragment program based on this. + */ + struct gl_fragment_program Base; + /* This is the list of hw specific fragment programs derived from Base */ + struct r300_fragment_program *progs; }; struct r300_fragment_program_compiler { @@ -633,6 +655,7 @@ struct r300_context { struct r300_hw_state hw; struct r300_vertex_program *selected_vp; + struct r300_fragment_program *selected_fp; /* Vertex buffers */ @@ -664,11 +687,7 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); -extern void r300SelectVertexShader(r300ContextPtr r300); extern void r300InitShaderFuncs(struct dd_function_table *functions); -extern int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, - float *dst); extern void r300InitShaderFunctions(r300ContextPtr r300); diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 4e8b62f186..9769ff5399 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -195,6 +195,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st } GLfloat *dst_ptr, *tmp; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); switch (input->Type) { @@ -228,7 +233,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st type = GL_FLOAT; r300_attr.free_needed = GL_TRUE; r300_attr.data = tmp; - r300_attr.stride = sizeof(GLfloat) * input->Size; + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = sizeof(GLfloat) * input->Size; + } r300_attr.dwords = input->Size; } else { type = input->Type; @@ -332,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { int i, tmp; - tmp = r300->selected_vp->key.InputsRead; + tmp = r300->selected_vp->Base->Base.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -428,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); r300UpdateShaderStates(r300); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index c3817721dc..feb3370f37 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -81,17 +81,17 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) return vic_1; } -GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes) { GLuint ret = 0; if (vp_writes & (1 << VERT_RESULT_HPOS)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0) + if (vp_writes & (1 << VERT_RESULT_COL0)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; - if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1) + if (vp_writes & (1 << VERT_RESULT_COL1)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; /* Two sided lighting works only if all 4 colors are written */ @@ -105,26 +105,17 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) return ret; } -GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) { GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) { + if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) { ret |= (4 << (3 * first_free_texcoord)); ++first_free_texcoord; } } - if (fp_reads & FRAG_BIT_WPOS) { - ret |= (4 << (3 * first_free_texcoord)); - ++first_free_texcoord; - } - - if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) { - ret |= 4 << (3 * first_free_texcoord); - } - if (first_free_texcoord > 8) { fprintf(stderr, "\tout of free texcoords\n"); _mesa_exit(-1); diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 2fb8b82d3a..3f8c60ffae 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -225,7 +225,7 @@ extern void r300EmitCacheFlush(r300ContextPtr rmesa); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); -extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); -extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index abc8757ba1..f5c4c0f4a0 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -50,13 +50,6 @@ #include "radeon_program.h" #include "radeon_program_alu.h" -static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) -{ - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (fp->Base.Parameters) - _mesa_load_state_parameters(ctx, fp->Base.Parameters); -} - static void nqssadce_init(struct nqssadce_state* s) { s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; @@ -74,10 +67,12 @@ static void nqssadce_init(struct nqssadce_state* s) */ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { - GLuint InputsRead = compiler->fp->Base.Base.InputsRead; + GLuint InputsRead = compiler->fp->Base->InputsRead; - if (!(InputsRead & FRAG_BIT_WPOS)) + if (!(InputsRead & FRAG_BIT_WPOS)) { + compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; return; + } static gl_state_index tokens[STATE_LENGTH] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 @@ -85,10 +80,23 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) struct prog_instruction *fpi; GLuint window_index; int i = 0; + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); + InputsRead |= 1 << i; + compiler->fp->Base->InputsRead = InputsRead; + compiler->fp->wpos_attr = i; + break; + } + } + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); _mesa_insert_instructions(compiler->program, 0, 3); fpi = compiler->program->Instructions; + i = 0; /* perspective divide */ fpi[i].Opcode = OPCODE_RCP; @@ -99,7 +107,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) fpi[i].DstReg.CondMask = COND_TR; fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; i++; @@ -111,7 +119,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) fpi[i].DstReg.CondMask = COND_TR; fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; @@ -154,6 +162,57 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) } } + +/** + * Rewrite fragment.fogcoord to use a texture coordinate slot. + * Note that fogcoord is forced into an X001 pattern, and this enforcement + * is done here. + * + * See also the counterpart rewriting for vertex programs. + */ +static void rewriteFog(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program *fp = compiler->fp; + GLuint InputsRead; + int i; + + InputsRead = fp->Base->InputsRead; + + if (!(InputsRead & FRAG_BIT_FOGC)) { + fp->fog_attr = FRAG_ATTRIB_MAX; + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); + InputsRead |= 1 << i; + fp->Base->InputsRead = InputsRead; + fp->fog_attr = i; + break; + } + } + + { + struct prog_instruction *inst; + + inst = compiler->program->Instructions; + while (inst->Opcode != OPCODE_END) { + const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { + inst->SrcReg[i].Index = fp->fog_attr; + inst->SrcReg[i].Swizzle = combine_swizzles( + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), + inst->SrcReg[i].Swizzle); + } + } + ++inst; + } + } +} + static GLuint build_dtm(GLuint depthmode) { switch(depthmode) { @@ -175,7 +234,7 @@ static GLuint build_func(GLuint comparefunc) */ static void build_state( r300ContextPtr r300, - struct r300_fragment_program *fp, + struct gl_fragment_program *fp, struct r300_fragment_program_external_state *state) { int unit; @@ -183,7 +242,7 @@ static void build_state( _mesa_bzero(state, sizeof(*state)); for(unit = 0; unit < 16; ++unit) { - if (fp->Base.Base.ShadowSamplers & (1 << unit)) { + if (fp->Base.ShadowSamplers & (1 << unit)) { struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); @@ -192,100 +251,149 @@ static void build_state( } } -void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) +static void rewrite_depth_out(struct gl_program *prog) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; - struct r300_fragment_program_external_state state; + struct prog_instruction *inst; - build_state(r300, r300_fp, &state); - if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - r300_fp->translated = GL_FALSE; - _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); - } - - if (!r300_fp->translated) { - struct r300_fragment_program_compiler compiler; + for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) + continue; - compiler.r300 = r300; - compiler.fp = r300_fp; - compiler.code = &r300_fp->code; - compiler.program = _mesa_clone_program(ctx, &fp->Base); + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; } + } +} - insert_WPOS_trailer(&compiler); +void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = fp->Base; + + if (RADEON_DEBUG & DEBUG_PIXEL) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } + insert_WPOS_trailer(&compiler); + + rewriteFog(&compiler); + + rewrite_depth_out(compiler.program); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 3, transformations); + } - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) - r300_fp->error = GL_TRUE; + if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + fp->error = GL_TRUE; - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->Base.Parameters); - fp->Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; + fp->translated = GL_TRUE; - _mesa_reference_program(ctx, &compiler.program, NULL); + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&fp->code); +} - r300_fp->translated = GL_TRUE; +struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program_cont *fp_list; + struct r300_fragment_program *fp; + struct r300_fragment_program_external_state state; - r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current; + build_state(r300, ctx->FragmentProgram._Current, &state); - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&r300_fp->code); + fp = fp_list->progs; + while (fp) { + if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) { + return r300->selected_fp = fp; + } + fp = fp->next; } - update_params(ctx, fp); + fp = _mesa_calloc(sizeof(struct r300_fragment_program)); + + fp->state = state; + fp->translated = GL_FALSE; + fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base); + + fp->next = fp_list->progs; + fp_list->progs = fp; + + return r300->selected_fp = fp; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h index 85ea86fecb..5e103ee408 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -30,6 +30,10 @@ #include "main/mtypes.h" -extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); +#include "r300_context.h" + +extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp); + +struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 0133b83796..62228a3786 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -32,22 +32,45 @@ #include "r300_context.h" #include "r300_fragprog_common.h" +static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache) +{ + struct r300_fragment_program *tmp, *fp = cache->progs; + + while (fp) { + tmp = fp->next; + _mesa_reference_program(ctx, &fp->Base, NULL); + _mesa_free(fp); + fp = tmp; + } +} + +static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache) +{ + struct r300_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + _mesa_reference_vertprog(ctx, &vp->Base, NULL); + _mesa_free(vp); + vp = tmp; + } +} + static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { struct r300_vertex_program_cont *vp; - struct r300_fragment_program *fp; + struct r300_fragment_program_cont *fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: vp = CALLOC_STRUCT(r300_vertex_program_cont); - return _mesa_init_vertex_program(ctx, &vp->mesa_program, - target, id); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: - fp = CALLOC_STRUCT(r300_fragment_program); + fp = CALLOC_STRUCT(r300_fragment_program_cont); return _mesa_init_fragment_program(ctx, &fp->Base, target, id); default: @@ -59,21 +82,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) { + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); + break; + case GL_FRAGMENT_PROGRAM_ARB: + freeFragProgCache(ctx, fp); + break; + } + _mesa_delete_program(ctx, prog); } static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - struct r300_vertex_program_cont *vp = (void *)prog; - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - r300_fp->translated = GL_FALSE; + freeFragProgCache(ctx, fp); + fp->progs = NULL; break; } @@ -85,13 +122,18 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *fp = r300SelectFragmentShader(ctx); if (!fp->translated) - r300TranslateFragmentShader(ctx, &fp->Base); + r300TranslateFragmentShader(ctx, fp); return !fp->error; - } else - return GL_TRUE; + } else { + struct r300_vertex_program *vp = r300SelectVertexShader(ctx); + if (!vp->translated) + r300TranslateVertexShader(vp); + + return !vp->error; + } } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c0eda977db..4d504d14e3 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -452,9 +452,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + r300ContextPtr r300 = R300_CONTEXT(ctx); - return (fp && fp->writes_depth); + return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -1093,24 +1093,25 @@ r300FetchStateParameter(GLcontext * ctx, * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION */ -void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { - struct r300_fragment_program *fp; + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_program_parameter_list *paramList; GLuint i; if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; - fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; - if (!fp) + if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) return; - paramList = fp->Base.Base.Parameters; + paramList = rmesa->selected_fp->Base->Parameters; if (!paramList) return; + _mesa_load_state_parameters(ctx, paramList); + for (i = 0; i < paramList->NumParameters; i++) { if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { r300FetchStateParameter(ctx, @@ -1225,8 +1226,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - struct r300_fragment_program_code *code = &fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; R300_STATECHANGE(r300, fpt); @@ -1266,9 +1266,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { + r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - struct r500_fragment_program_code *code = &fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1316,8 +1316,6 @@ static void r300SetupTextures(GLcontext * ctx) int hw_tmu = 0; int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, }; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; R300_STATECHANGE(r300, txe); R300_STATECHANGE(r300, tex.filter); @@ -1420,7 +1418,7 @@ static void r300SetupTextures(GLcontext * ctx) cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (fp->Base.UsesKill && last_hw_tmu < 0) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { // The KILL operation requires the first texture unit // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; @@ -1458,11 +1456,11 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + InputsRead = r300->selected_fp->Base->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1521,29 +1519,6 @@ static void r300SetupRSUnit(GLcontext * ctx) ++fp_reg; } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - - if (InputsRead & FRAG_BIT_FOGC) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0); - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } else { - WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); - } - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0); @@ -1575,11 +1550,11 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + InputsRead = r300->selected_fp->Base->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1642,36 +1617,6 @@ static void r500SetupRSUnit(GLcontext * ctx) ++fp_reg; } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - - if (InputsRead & FRAG_BIT_FOGC) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } else { - WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); - } - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0); @@ -2036,43 +1981,51 @@ static void r300ResetHwState(r300ContextPtr r300) void r300UpdateShaders(r300ContextPtr rmesa) { - GLcontext *ctx; - struct r300_fragment_program *fp; - int i; - - ctx = rmesa->radeon.glCtx; - fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + GLcontext *ctx = rmesa->radeon.glCtx; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ - if (!fp) { + if (!ctx->FragmentProgram._Current) { _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); return; } - if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = - TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - &rmesa->dummy_attrib[i]; - } + { + struct r300_fragment_program *fp; - _tnl_UpdateFixedFunctionProgram(ctx); + fp = r300SelectFragmentShader(ctx); + if (!fp->translated) + r300TranslateFragmentShader(ctx, fp); - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - rmesa->temp_attrib[i]; - } - - r300SelectVertexShader(rmesa); - r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error); + r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); } - if (!fp->translated || rmesa->radeon.NewGLState) - r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + if (rmesa->options.hw_tcl_enabled) { + struct r300_vertex_program *vp; + + if (rmesa->radeon.NewGLState) { + int i; + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = + TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + &rmesa->dummy_attrib[i]; + } + + _tnl_UpdateFixedFunctionProgram(ctx); + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + rmesa->temp_attrib[i]; + } + } + + vp = r300SelectVertexShader(ctx); + if (!vp->translated) + r300TranslateVertexShader(vp); - r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); + r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); + } r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); rmesa->radeon.NewGLState = 0; @@ -2102,7 +2055,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, static void r300SetupPixelShader(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; int i, k; @@ -2148,8 +2101,7 @@ static void r300SetupPixelShader(GLcontext *ctx) R300_STATECHANGE(rmesa, fpp); rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->Base.Base, code->constant[i]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2174,7 +2126,7 @@ static void r300SetupPixelShader(GLcontext *ctx) static void r500SetupPixelShader(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = rmesa->selected_fp; int i; struct r500_fragment_program_code *code; @@ -2210,8 +2162,7 @@ static void r500SetupPixelShader(GLcontext *ctx) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->Base.Base, code->constant[i]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); @@ -2274,20 +2225,17 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); } void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *r300_fp; - - r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; /* should only happenen once, just after context is created */ - if (!r300_fp) + if (!ctx->FragmentProgram._Current) return; r300SetEarlyZState(ctx); @@ -2323,8 +2271,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) R300_STATECHANGE(r300, cb); } - r300UpdateStateParameters(ctx, new_state); - r300->radeon.NewGLState |= new_state; } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 2328289420..d46bf9f179 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. void r300UpdateViewportOffset (GLcontext * ctx); void r300UpdateDrawBuffer (GLcontext * ctx); -void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index ce4179208e..56ed519cf4 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead; + GLuint fp_reads = rmesa->selected_fp->Base->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; @@ -150,6 +150,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } + if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + + if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + /** * Sending only one texcoord component may lead to lock up, * so for all textures always output 4 texcoord components to RS. @@ -192,31 +208,9 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ } } - /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ - if (fp_reads & FRAG_BIT_WPOS) { - if (first_free_tex >= ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tout of free texcoords to write w pos\n"); - _mesa_exit(-1); - } - - InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); - OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0); - ++first_free_tex; - } - - if (fp_reads & FRAG_BIT_FOGC) { - if (first_free_tex >= ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); - _mesa_exit(-1); - } - - InputsRead |= 1 << VERT_ATTRIB_FOG; - OutputsWritten |= 1 << VERT_RESULT_FOGC; - GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); - ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + _mesa_exit(-1); } R300_NEWPRIM(rmesa); @@ -497,11 +491,13 @@ void r300RenderStart(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); + + r300UpdateShaders(rmesa); + r300PrepareVertices(ctx); r300ValidateBuffers(ctx); - r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index c41a8fdd62..de32013032 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -32,12 +32,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/enums.h" #include "shader/program.h" +#include "shader/programopt.h" #include "shader/prog_instruction.h" +#include "shader/prog_optimize.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" @@ -71,15 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) -int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { int pi; - struct gl_vertex_program *mesa_vp = &vp->mesa_program; float *dst_o = dst; struct gl_program_parameter_list *paramList; - if (mesa_vp->IsNVProgram) { + if (vp->IsNVProgram) { _mesa_load_tracked_matrices(ctx); for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { @@ -91,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx, return dst - dst_o; } - assert(mesa_vp->Base.Parameters); - _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + if (!vp->Base.Parameters) + return 0; - if (mesa_vp->Base.Parameters->NumParameters * 4 > + _mesa_load_state_parameters(ctx, vp->Base.Parameters); + + if (vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH) { fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = mesa_vp->Base.Parameters; + paramList = vp->Base.Parameters; for (pi = 0; pi < paramList->NumParameters; pi++) { switch (paramList->Parameters[pi].Type) { case PROGRAM_STATE_VAR: @@ -933,10 +936,14 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) { int i; int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = vp->Base->Base.OutputsWritten; + InputsRead = vp->Base->Base.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (vp->key.InputsRead & (1 << i)) + if (InputsRead & (1 << i)) vp->inputs[i] = ++cur_reg; else vp->inputs[i] = -1; @@ -946,13 +953,13 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) for (i = 0; i < VERT_RESULT_MAX; i++) vp->outputs[i] = -1; - assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; } @@ -962,46 +969,46 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) * pretend it does by skipping output index reg so the colors * get written into appropriate output vectors. */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || - vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { vp->outputs[VERT_RESULT_COL1] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || - vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { vp->outputs[VERT_RESULT_BFC0] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { vp->outputs[VERT_RESULT_BFC1] = cur_reg++; - } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (vp->key.OutputsWritten & (1 << i)) { + if (OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; } } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) +void r300TranslateVertexShader(struct r300_vertex_program *vp) { + struct prog_instruction *vpi = vp->Base->Base.Instructions; int i; GLuint *inst; unsigned long num_operands; @@ -1191,313 +1198,463 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) +{ + struct prog_instruction *vpi; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); + + vpi = &prog->Instructions[prog->NumInstructions - 3]; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; -static void position_invariant(struct gl_program *prog) + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; +} + +static void pos_as_texcoord(struct gl_program *prog, int tex_id) { struct prog_instruction *vpi; - struct gl_program_parameter_list *paramList; - int i; + GLuint tempregi = prog->NumTemporaries; - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + prog->NumTemporaries++; - /* tokens[4] = matrix modifier */ -#ifdef PREFER_DP4 - tokens[4] = 0; /* not transposed or inverted */ -#else - tokens[4] = STATE_MATRIX_TRANSPOSE; -#endif - paramList = prog->Parameters; - - vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); - _mesa_init_instructions(vpi, prog->NumInstructions + 4); - - for (i = 0; i < 4; i++) { - GLint idx; - tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ - idx = _mesa_add_state_reference(paramList, tokens); -#ifdef PREFER_DP4 - vpi[i].Opcode = OPCODE_DP4; - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - vpi[i].DstReg.File = PROGRAM_OUTPUT; - vpi[i].DstReg.Index = VERT_RESULT_HPOS; - vpi[i].DstReg.WriteMask = 1 << i; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; -#else - if (i == 0) - vpi[i].Opcode = OPCODE_MUL; - else - vpi[i].Opcode = OPCODE_MAD; + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } - vpi[i].Data = 0; + insert_wpos(prog, tempregi, tex_id); - if (i == 3) - vpi[i].DstReg.File = PROGRAM_OUTPUT; - else - vpi[i].DstReg.File = PROGRAM_TEMPORARY; - vpi[i].DstReg.Index = 0; - vpi[i].DstReg.WriteMask = 0xf; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); - - if (i > 0) { - vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; - vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_X; } -#endif + + ++vpi; } - _mesa_copy_instructions(&vpi[i], prog->Instructions, - prog->NumInstructions); + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} - free(prog->Instructions); +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; - prog->Instructions = vpi; + inst = &prog->Instructions[pos]; - prog->NumInstructions += 4; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; - assert(vpi->Opcode == OPCODE_END); + return 0; } -static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, - GLuint temp_index) +static int translateDP3(struct gl_program *prog, int pos) { - struct prog_instruction *vpi; - struct prog_instruction *vpi_insert; - int i = 0; + struct prog_instruction *inst; - vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); - _mesa_init_instructions(vpi, prog->NumInstructions + 2); - /* all but END */ - _mesa_copy_instructions(vpi, prog->Instructions, - prog->NumInstructions - 1); - /* END */ - _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); - vpi_insert = &vpi[prog->NumInstructions - 1]; + inst = &prog->Instructions[pos]; - vpi_insert[i].Opcode = OPCODE_MOV; + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + return 0; +} + +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; + return 0; +} - vpi_insert[i].Opcode = OPCODE_MOV; +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + tmp_idx = prog->NumTemporaries++; - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; + _mesa_insert_instructions(prog, pos + 1, 1); - free(prog->Instructions); + inst = &prog->Instructions[pos]; + dst = inst->DstReg; - prog->Instructions = vpi; + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; - prog->NumInstructions += i; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; - assert(vpi->Opcode == OPCODE_END); + return 1; } -static void pos_as_texcoord(struct r300_vertex_program *vp, - struct gl_program *prog) +static int translateSUB(struct gl_program *prog, int pos) { - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; + struct prog_instruction *inst; - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT - && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - insert_wpos(vp, prog, tempregi); + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; } -static struct r300_vertex_program *build_program(struct r300_vertex_program_key - *wanted_key, struct gl_vertex_program - *mesa_vp, GLint wpos_idx) +static int translateSWZ(struct gl_program *prog, int pos) { - struct r300_vertex_program *vp; + prog->Instructions[pos].Opcode = OPCODE_MOV; - vp = _mesa_calloc(sizeof(*vp)); - _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; + return 0; +} - if (mesa_vp->IsPositionInvariant) { - position_invariant(&mesa_vp->Base); - } +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; - if (wpos_idx > -1) { - pos_as_texcoord(vp, &mesa_vp->Base); + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; + } } +} - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(&mesa_vp->Base); - fflush(stdout); +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint OutputsAdded, FpReads; + int i, count; + + OutputsAdded = 0; + count = 0; + FpReads = r300->selected_fp->Base->InputsRead; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } /* Some outputs may be artificially added, to match the inputs of the fragment program. * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by * vertex program are undefined, so just use MOV [vertex_result], CONST[0] */ - { - int i, count = 0; + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); + inst = &prog->Instructions[prog->NumInstructions - 1 - count]; + for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (vp->key.OutputsAdded & (1 << i)) { - ++count; + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; } } - if (count > 0) { - struct prog_instruction *inst; + prog->OutputsWritten |= OutputsAdded; + } +} - _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count); - inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count]; +#undef ADD_OUTPUT - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (vp->key.OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; +static void nqssadceInit(struct nqssadce_state* s) +{ + r300ContextPtr r300 = R300_CONTEXT(s->Ctx); + GLuint fp_reads; - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; + fp_reads = r300->selected_fp->Base->InputsRead; + { + if (fp_reads & FRAG_BIT_COL0) { + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; + } - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + if (fp_reads & FRAG_BIT_COL1) { + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + } + } - ++inst; - } + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; } } } - assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); - - return vp; + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; } -static void add_outputs(struct r300_vertex_program_key *key, GLint vert) +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) { - if (key->OutputsWritten & (1 << vert)) - return; + (void) opcode; + (void) reg; - key->OutputsWritten |= 1 << vert; - key->OutputsAdded |= 1 << vert; + return GL_TRUE; } -void r300SelectVertexShader(r300ContextPtr r300) +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + const struct gl_vertex_program *mesa_vp) { - GLcontext *ctx = ctx = r300->radeon.glCtx; - GLuint InputsRead; - struct r300_vertex_program_key wanted_key = { 0 }; - GLint i; - struct r300_vertex_program_cont *vpc; + r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - GLint wpos_idx; + struct gl_program *prog; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - - wpos_idx = -1; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + vp = _mesa_calloc(sizeof(*vp)); + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found\n"); - _mesa_exit(-1); - } + prog = &vp->Base->Base; + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(prog); + fflush(stdout); + } - wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - wpos_idx = i; + if (vp->Base->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); } - if (vpc->mesa_program.IsPositionInvariant) { - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); - } else { - add_outputs(&wanted_key, VERT_RESULT_HPOS); + if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); } - if (InputsRead & FRAG_BIT_COL0) { - add_outputs(&wanted_key, VERT_RESULT_COL0); + if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); } - if (InputsRead & FRAG_BIT_COL1) { - add_outputs(&wanted_key, VERT_RESULT_COL1); + addArtificialOutputs(ctx, prog); + + translateInsts(prog); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(prog); + fflush(stdout); } - if (InputsRead & FRAG_BIT_FOGC) { - add_outputs(&wanted_key, VERT_RESULT_FOGC); + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(ctx, prog, &nqssadce); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, prog); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(prog); + fflush(stdout); + } } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); + assert(prog->NumInstructions); + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = prog->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + vp->num_temporaries = max + 1; } - for (vp = vpc->progs; vp; vp = vp->next) + return vp; +} + +struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + + for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { - r300->selected_vp = vp; - return; + return r300->selected_vp = vp; } - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(&vpc->mesa_program.Base); - fflush(stdout); } - vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); vp->next = vpc->progs; vpc->progs = vp; - r300->selected_vp = vp; + + return r300->selected_vp = vp; } #define bump_vpu_count(ptr, new_count) do { \ @@ -1544,25 +1701,22 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) struct r300_vertex_program *prog = rmesa->selected_vp; int inst_count = 0; int param_count = 0; - + /* Reset state, in case we don't use something */ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - + R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, - (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current, - (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); inst_count = (prog->hw_code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead), - _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), + _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index b552e3fb1b..2dab11c337 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -34,4 +34,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa); +struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx); + +void r300TranslateVertexShader(struct r300_vertex_program *vp); + #endif diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index 4a2e1cba40..840c9733b1 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -46,6 +46,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil switch(file) { case PROGRAM_TEMPORARY: return &s->Temps[index]; case PROGRAM_OUTPUT: return &s->Outputs[index]; + case PROGRAM_ADDRESS: return &s->Address; default: return 0; } } @@ -56,7 +57,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil * * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. */ -static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) { struct prog_src_register tmp = srcreg; int i; @@ -114,47 +115,19 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, deswz_source = sourced; } - struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + struct register_state *regstate; + + if (inst->SrcReg[src].RelAddr) + regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + else + regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) regstate->Sourced |= deswz_source & 0xf; return inst; } - -static void rewrite_depth_out(struct prog_instruction *inst) -{ - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - return; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } -} - static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) { int nsrc = _mesa_num_inst_src_regs(inst->Opcode); @@ -189,11 +162,6 @@ static void process_instruction(struct nqssadce_state* s) return; if (inst->Opcode != OPCODE_KIL) { - if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) - rewrite_depth_out(inst); - } - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", @@ -217,6 +185,7 @@ static void process_instruction(struct nqssadce_state* s) * might change the instruction stream under us, so we have * to be careful with the inst pointer. */ switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: @@ -227,6 +196,8 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_MAX: case OPCODE_MIN: case OPCODE_MUL: + case OPCODE_SGE: + case OPCODE_SLT: inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); break; @@ -258,12 +229,51 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_TXP: inst = track_used_srcreg(s, inst, 0, 0xf); break; + case OPCODE_DST: + inst = track_used_srcreg(s, inst, 0, 0x6); + inst = track_used_srcreg(s, inst, 1, 0xa); + break; + case OPCODE_EXP: + case OPCODE_LOG: + case OPCODE_POW: + inst = track_used_srcreg(s, inst, 0, 0x3); + break; + case OPCODE_LIT: + inst = track_used_srcreg(s, inst, 0, 0xb); + break; default: _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); return; } } +static void calculateInputsOutputs(struct gl_program *p) +{ + struct prog_instruction *inst; + GLuint InputsRead, OutputsWritten; + + inst = p->Instructions; + InputsRead = 0; + OutputsWritten = 0; + while (inst->Opcode != OPCODE_END) + { + int i, num_src_regs; + + num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < num_src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) + InputsRead |= 1 << inst->SrcReg[i].Index; + } + + if (inst->DstReg.File == PROGRAM_OUTPUT) + OutputsWritten |= 1 << inst->DstReg.Index; + + ++inst; + } + + p->InputsRead = InputsRead; + p->OutputsWritten = OutputsWritten; +} void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) { @@ -280,4 +290,6 @@ void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce s.IP--; process_instruction(&s); } + + calculateInputsOutputs(p); } diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h index a4f94abcb6..8626f21c25 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h @@ -58,6 +58,7 @@ struct nqssadce_state { */ struct register_state Temps[MAX_PROGRAM_TEMPS]; struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Address; }; @@ -83,14 +84,10 @@ struct radeon_nqssadce_descr { */ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - /** - * Rewrite instructions that write to DEPR.z to write to DEPR.w - * instead (rewriting is done *before* the WriteMask test). - */ - GLboolean RewriteDepthOut; void *Data; }; void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index b411f69bc8..88474d43a2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -52,6 +52,38 @@ enum { #define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) #define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) +static inline GLuint get_swz(GLuint swz, GLuint idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline GLuint combine_swizzles(GLuint src, GLuint swz) +{ + GLuint ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + + return ret; +} + + /** * Transformation context that is passed to local transformations. * diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 906d36e522..d6fb474cf2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -870,7 +870,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, _mesa_bzero(&s, sizeof(s)); s.Ctx = ctx; - s.Program = program; + s.Program = _mesa_clone_program(ctx, program); s.Handler = handler; s.UserData = userdata; s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; @@ -904,6 +904,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, _mesa_free(s.ValuePool); _mesa_free(s.ReaderPool); + _mesa_reference_program(ctx, &s.Program, NULL); + return !s.Error; } |