From 8520b15018ca10e2bc47c1db4f7378df6d3c2e99 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Feb 2009 17:06:06 +0100 Subject: r300: Redirect constant TEX coordinates R3xx/R5xx fragment program texture constants must come from a hardware register instead of the constant file, so we redirect if necessary during the native rewrite phase. The symptoms of this bug started appearing when the Mesa fixed function texenvprogram code started using STATE_CURRENT_ATTRIB constants for texture coordinates when the corresponding attributes were constant across all vertices. Signed-off-by: Nicolai Haehnle --- src/mesa/drivers/dri/r300/r300_fragprog.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 4ef7f2bd78..8d030c63fb 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -163,6 +163,19 @@ static GLboolean transform_TEX( } } + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); -- cgit v1.2.3 From 8d475822e6e19fa79719c856a2db5b6a205db1b9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 28 Feb 2009 11:49:46 -0700 Subject: mesa: rename, reorder FRAG_RESULT_x tokens s/FRAG_RESULT_DEPR/FRAG_RESULT_DEPTH/ s/FRAG_RESULT_COLR/FRAG_RESULT/COLOR/ Remove FRAG_RESULT_COLH (NV half-precision) output since we never used it. Next, we might merge the COLOR and DATA outputs (COLOR0, COLOR1, etc). --- src/mesa/drivers/dri/i915/i915_fragprog.c | 4 +-- src/mesa/drivers/dri/i965/brw_wm.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_fp.c | 8 ++--- src/mesa/drivers/dri/i965/brw_wm_state.c | 2 +- src/mesa/drivers/dri/r300/r300_fragprog.c | 4 +-- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 +-- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 2 +- src/mesa/drivers/dri/r300/radeon_program_pair.c | 4 +-- src/mesa/main/mtypes.h | 7 ++--- src/mesa/main/texenvprogram.c | 6 ++-- src/mesa/shader/arbprogparse.c | 4 +-- src/mesa/shader/nvfragparse.c | 40 ++++++++----------------- src/mesa/shader/nvfragparse.h | 5 ---- src/mesa/shader/prog_debug.c | 12 +++----- src/mesa/shader/program.c | 6 ++-- src/mesa/shader/programopt.c | 6 ++-- src/mesa/shader/slang/slang_codegen.c | 4 +-- src/mesa/shader/slang/slang_link.c | 2 +- src/mesa/state_tracker/st_atom_pixeltransfer.c | 4 +-- src/mesa/state_tracker/st_cb_drawpixels.c | 10 +++---- src/mesa/state_tracker/st_program.c | 10 +++---- src/mesa/swrast/s_context.c | 2 +- src/mesa/swrast/s_fragprog.c | 12 ++++---- 23 files changed, 68 insertions(+), 92 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index f091d600c3..52f09a4b1b 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -180,9 +180,9 @@ get_result_vector(struct i915_fragment_program *p, switch (inst->DstReg.File) { case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { - case FRAG_RESULT_COLR: + case FRAG_RESULT_COLOR: return UREG(REG_TYPE_OC, 0); - case FRAG_RESULT_DEPR: + case FRAG_RESULT_DEPTH: p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 10f38e02a4..06a6f3f0f4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -198,7 +198,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<program.Base.OutputsWritten & (1<Aux = (i<<1); if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 0, outcolor, payload_r0_depth, outdepth); inst->Aux = (i<<1); @@ -892,7 +892,7 @@ static void emit_fb_write( struct brw_wm_compile *c ) if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 0, outcolor, payload_r0_depth, outdepth); @@ -928,7 +928,7 @@ static void validate_dst_regs( struct brw_wm_compile *c, { if (inst->DstReg.File == PROGRAM_OUTPUT) { GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) + if (idx == FRAG_RESULT_COLOR) c->fp_fragcolor_emitted = 1; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 3c3b3473d6..f7ea756fb0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -103,7 +103,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 8d030c63fb..32182bb667 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -356,8 +356,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 926ddd5964..07a2a7b17c 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -298,8 +298,8 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index 97ce016c99..a083c3d243 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -191,7 +191,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) rewrite_depth_out(inst); } diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 58bc0d5843..365e7c1722 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -778,10 +778,10 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc struct prog_instruction *inst = s->Program->Instructions + ip; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLR) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) { + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 22add9d975..f17b9e1e71 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -268,10 +268,9 @@ typedef enum */ typedef enum { - FRAG_RESULT_COLR = 0, - FRAG_RESULT_COLH = 1, - FRAG_RESULT_DEPR = 2, - FRAG_RESULT_DATA0 = 3, + FRAG_RESULT_DEPTH = 0, + FRAG_RESULT_COLOR = 1, + FRAG_RESULT_DATA0 = 2, FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) } gl_frag_result; diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index d2a9e35dd5..51c13a563d 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -1098,7 +1098,7 @@ emit_texenv(struct texenv_fragment_program *p, GLuint unit) rgb_shift) dest = get_temp( p ); else - dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); /* Emit the RGB and A combine ops */ @@ -1278,7 +1278,7 @@ create_new_program(GLcontext *ctx, struct state_key *key, p.program->Base.Parameters = _mesa_new_parameter_list(); p.program->Base.InputsRead = 0; - p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLR; + p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR; for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) p.src_texture[unit] = undef; @@ -1313,7 +1313,7 @@ create_new_program(GLcontext *ctx, struct state_key *key, } cf = get_source( &p, SRC_PREVIOUS, 0 ); - out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLR ); + out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR ); if (key->separate_specular) { /* Emit specular add. diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 62edb7f595..ccc0318a53 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -1688,7 +1688,7 @@ parse_result_binding(GLcontext *ctx, const GLubyte **inst, */ parse_output_color_num(ctx, inst, Program, &out_color); ASSERT(out_color < MAX_DRAW_BUFFERS); - *outputReg = FRAG_RESULT_COLR; + *outputReg = FRAG_RESULT_COLOR; } else { /* for vtx programs, this is VERTEX_RESULT_POSITION */ @@ -1699,7 +1699,7 @@ parse_result_binding(GLcontext *ctx, const GLubyte **inst, case FRAGMENT_RESULT_DEPTH: if (Program->Base.Target == GL_FRAGMENT_PROGRAM_ARB) { /* for frag programs, this is FRAGMENT_RESULT_DEPTH */ - *outputReg = FRAG_RESULT_DEPR; + *outputReg = FRAG_RESULT_DEPTH; } else { /* for vtx programs, this is VERTEX_RESULT_COLOR */ diff --git a/src/mesa/shader/nvfragparse.c b/src/mesa/shader/nvfragparse.c index 20e4781372..37418ffb6e 100644 --- a/src/mesa/shader/nvfragparse.c +++ b/src/mesa/shader/nvfragparse.c @@ -384,18 +384,12 @@ static const char *InputRegisters[MAX_NV_FRAGMENT_PROGRAM_INPUTS + 1] = { "TEX0", "TEX1", "TEX2", "TEX3", "TEX4", "TEX5", "TEX6", "TEX7", NULL }; + static const char *OutputRegisters[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS + 1] = { - "COLR", "COLH", - /* These are only allows for register combiners */ - /* - "TEX0", "TEX1", "TEX2", "TEX3", - */ - "DEPR", NULL + "DEPR", "COLR", "DATA0", NULL }; - - /**********************************************************************/ /** @@ -828,7 +822,6 @@ static GLboolean Parse_OutputReg(struct parse_state *parseState, GLint *outputRegNum) { GLubyte token[100]; - GLint j; /* Match "o[" */ if (!Parse_String(parseState, "o[")) @@ -839,19 +832,19 @@ Parse_OutputReg(struct parse_state *parseState, GLint *outputRegNum) RETURN_ERROR; /* try to match an output register name */ - for (j = 0; OutputRegisters[j]; j++) { - if (_mesa_strcmp((const char *) token, OutputRegisters[j]) == 0) { - static GLuint bothColors = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_COLH); - *outputRegNum = j; - parseState->outputsWritten |= (1 << j); - if ((parseState->outputsWritten & bothColors) == bothColors) { - RETURN_ERROR1("Illegal to write to both o[COLR] and o[COLH]"); - } - break; - } + if (_mesa_strcmp((char *) token, "COLR") == 0 || + _mesa_strcmp((char *) token, "COLH") == 0) { + /* note that we don't distinguish between COLR and COLH */ + *outputRegNum = FRAG_RESULT_COLOR; + parseState->outputsWritten |= (1 << FRAG_RESULT_COLOR); } - if (!OutputRegisters[j]) + else if (_mesa_strcmp((char *) token, "DEPR") == 0) { + *outputRegNum = FRAG_RESULT_DEPTH; + parseState->outputsWritten |= (1 << FRAG_RESULT_DEPTH); + } + else { RETURN_ERROR1("Invalid output register name"); + } /* Match ']' */ if (!Parse_String(parseState, "]")) @@ -1826,10 +1819,3 @@ _mesa_nv_fragment_input_register_name(GLuint i) return InputRegisters[i]; } - -const char * -_mesa_nv_fragment_output_register_name(GLuint i) -{ - ASSERT(i < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS); - return OutputRegisters[i]; -} diff --git a/src/mesa/shader/nvfragparse.h b/src/mesa/shader/nvfragparse.h index de45cf543d..ac97921080 100644 --- a/src/mesa/shader/nvfragparse.h +++ b/src/mesa/shader/nvfragparse.h @@ -44,9 +44,4 @@ _mesa_print_nv_fragment_program(const struct gl_fragment_program *program); extern const char * _mesa_nv_fragment_input_register_name(GLuint i); - -extern const char * -_mesa_nv_fragment_output_register_name(GLuint i); - - #endif diff --git a/src/mesa/shader/prog_debug.c b/src/mesa/shader/prog_debug.c index 7bcb2ef734..35ce37d5ce 100644 --- a/src/mesa/shader/prog_debug.c +++ b/src/mesa/shader/prog_debug.c @@ -222,20 +222,16 @@ _mesa_GetProgramRegisterfvMESA(GLenum target, "glGetProgramRegisterfvMESA(registerName)"); return; } - else if (_mesa_strcmp(reg, "o[COLR]") == 0) { + else if (_mesa_strcmp(reg, "o[COLR]") == 0 || + _mesa_strcmp(reg, "o[COLH]") == 0) { /* Fragment output color */ ctx->Driver.GetProgramRegister(ctx, PROGRAM_OUTPUT, - FRAG_RESULT_COLR, v); - } - else if (_mesa_strcmp(reg, "o[COLH]") == 0) { - /* Fragment output color */ - ctx->Driver.GetProgramRegister(ctx, PROGRAM_OUTPUT, - FRAG_RESULT_COLH, v); + FRAG_RESULT_COLOR, v); } else if (_mesa_strcmp(reg, "o[DEPR]") == 0) { /* Fragment output depth */ ctx->Driver.GetProgramRegister(ctx, PROGRAM_OUTPUT, - FRAG_RESULT_DEPR, v); + FRAG_RESULT_DEPTH, v); } else { /* try user-defined identifiers */ diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 00655f0288..2e5632710e 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -738,7 +738,7 @@ _mesa_combine_programs(GLcontext *ctx, /* Connect color outputs of fprogA to color inputs of fprogB, via a * new temporary register. */ - if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) && + if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLOR)) && (progB_inputsRead & FRAG_BIT_COL0)) { GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY); if (tempReg < 0) { @@ -748,7 +748,7 @@ _mesa_combine_programs(GLcontext *ctx, } /* replace writes to result.color[0] with tempReg */ replace_registers(newInst, lenA, - PROGRAM_OUTPUT, FRAG_RESULT_COLR, + PROGRAM_OUTPUT, FRAG_RESULT_COLOR, PROGRAM_TEMPORARY, tempReg); /* replace reads from the input color with tempReg */ replace_registers(newInst + lenA, lenB, @@ -758,7 +758,7 @@ _mesa_combine_programs(GLcontext *ctx, /* compute combined program's InputsRead */ inputsB = progB_inputsRead; - if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) { + if (progA->OutputsWritten & (1 << FRAG_RESULT_COLOR)) { inputsB &= ~(1 << FRAG_ATTRIB_COL0); } newProg->InputsRead = progA->InputsRead | inputsB; diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index 56f1eb832e..05d9bdf7ec 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -171,7 +171,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) if (inst->Opcode == OPCODE_END) break; if (inst->DstReg.File == PROGRAM_OUTPUT && - inst->DstReg.Index == FRAG_RESULT_COLR) { + inst->DstReg.Index == FRAG_RESULT_COLOR) { /* change the instruction to write to colorTemp w/ clamping */ inst->DstReg.File = PROGRAM_TEMPORARY; inst->DstReg.Index = colorTemp; @@ -249,7 +249,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */ inst->Opcode = OPCODE_LRP; inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = FRAG_RESULT_COLR; + inst->DstReg.Index = FRAG_RESULT_COLOR; inst->DstReg.WriteMask = WRITEMASK_XYZ; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = fogFactorTemp; @@ -264,7 +264,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) /* MOV result.color.w, colorTemp.x; # copy alpha */ inst->Opcode = OPCODE_MOV; inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = FRAG_RESULT_COLR; + inst->DstReg.Index = FRAG_RESULT_COLOR; inst->DstReg.WriteMask = WRITEMASK_W; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = colorTemp; diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 5e2ce0ce3a..ee24e2e138 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -450,8 +450,8 @@ _slang_output_index(const char *name, GLenum target) { NULL, 0 } }; static const struct output_info fragOutputs[] = { - { "gl_FragColor", FRAG_RESULT_COLR }, - { "gl_FragDepth", FRAG_RESULT_DEPR }, + { "gl_FragColor", FRAG_RESULT_COLOR }, + { "gl_FragDepth", FRAG_RESULT_DEPTH }, { "gl_FragData", FRAG_RESULT_DATA0 }, { NULL, 0 } }; diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index bcabe71bfa..f98434892b 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -686,7 +686,7 @@ _slang_link(GLcontext *ctx, /* check that gl_FragColor and gl_FragData are not both written to */ if (shProg->FragmentProgram) { GLbitfield outputsWritten = shProg->FragmentProgram->Base.OutputsWritten; - if ((outputsWritten & ((1 << FRAG_RESULT_COLR))) && + if ((outputsWritten & ((1 << FRAG_RESULT_COLOR))) && (outputsWritten >= (1 << FRAG_RESULT_DATA0))) { link_error(shProg, "Fragment program cannot write both gl_FragColor" " and gl_FragData[].\n"); diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index ce872458e3..05b69c9d00 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -212,7 +212,7 @@ get_pixel_transfer_program(GLcontext *ctx, const struct state_key *key) inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; ic++; fp->Base.InputsRead = (1 << FRAG_ATTRIB_TEX0); - fp->Base.OutputsWritten = (1 << FRAG_RESULT_COLR); + fp->Base.OutputsWritten = (1 << FRAG_RESULT_COLOR); fp->Base.SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ if (key->scaleAndBias) { @@ -400,7 +400,7 @@ get_pixel_transfer_program(GLcontext *ctx, const struct state_key *key) { struct prog_instruction *last = &inst[ic - 1]; last->DstReg.File = PROGRAM_OUTPUT; - last->DstReg.Index = FRAG_RESULT_COLR; + last->DstReg.Index = FRAG_RESULT_COLOR; } /* END; */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index c73d0080ca..cc7a9e7890 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -75,7 +75,7 @@ is_passthrough_program(const struct gl_fragment_program *prog) if (inst[0].Opcode == OPCODE_MOV && inst[1].Opcode == OPCODE_END && inst[0].DstReg.File == PROGRAM_OUTPUT && - inst[0].DstReg.Index == FRAG_RESULT_COLR && + inst[0].DstReg.Index == FRAG_RESULT_COLOR && inst[0].DstReg.WriteMask == WRITEMASK_XYZW && inst[0].SrcReg[0].File == PROGRAM_INPUT && inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 && @@ -158,7 +158,7 @@ combined_drawpix_fragment_program(GLcontext *ctx) /** * Create fragment shader that does a TEX() instruction to get a Z - * value, then writes to FRAG_RESULT_DEPR. + * value, then writes to FRAG_RESULT_DEPTH. * Pass fragment color through as-is. */ static struct st_fragment_program * @@ -191,7 +191,7 @@ make_fragment_shader_z(struct st_context *st) /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ p->Instructions[ic].Opcode = OPCODE_TEX; p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPR; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH; p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; @@ -202,7 +202,7 @@ make_fragment_shader_z(struct st_context *st) /* MOV result.color, fragment.color */ p->Instructions[ic].Opcode = OPCODE_MOV; p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLOR; p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_COL0; ic++; @@ -213,7 +213,7 @@ make_fragment_shader_z(struct st_context *st) assert(ic == p->NumInstructions); p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0; - p->OutputsWritten = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_DEPR); + p->OutputsWritten = (1 << FRAG_RESULT_COLOR) | (1 << FRAG_RESULT_DEPTH); p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ st->drawpix.z_shader = (struct st_fragment_program *) p; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index d2535b6a2f..f825204915 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -466,23 +466,23 @@ st_translate_fragment_program(struct st_context *st, GLbitfield outputsWritten = stfp->Base.Base.OutputsWritten; /* if z is written, emit that first */ - if (outputsWritten & (1 << FRAG_RESULT_DEPR)) { + if (outputsWritten & (1 << FRAG_RESULT_DEPTH)) { fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPR] = fs_num_outputs; + outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPR); + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); } /* handle remaning outputs (color) */ for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { if (outputsWritten & (1 << attr)) { switch (attr) { - case FRAG_RESULT_DEPR: + case FRAG_RESULT_DEPTH: /* handled above */ assert(0); break; - case FRAG_RESULT_COLR: + case FRAG_RESULT_COLOR: fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; fs_output_semantic_index[fs_num_outputs] = numColors; outputMapping[attr] = fs_num_outputs; diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index 297940adbd..719e6b8296 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -206,7 +206,7 @@ _swrast_update_deferred_texture(GLcontext *ctx) else { const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; - if (fprog && (fprog->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR))) { + if (fprog && (fprog->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH))) { /* Z comes from fragment program/shader */ swrast->_DeferredTexture = GL_FALSE; } diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index c6601f5593..3735f041d6 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -202,9 +202,9 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) if (_mesa_execute_program(ctx, &program->Base, machine)) { /* Store result color */ - if (outputsWritten & (1 << FRAG_RESULT_COLR)) { + if (outputsWritten & (1 << FRAG_RESULT_COLOR)) { COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], - machine->Outputs[FRAG_RESULT_COLR]); + machine->Outputs[FRAG_RESULT_COLOR]); } else { /* Multiple drawbuffers / render targets @@ -221,8 +221,8 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) } /* Store result depth/z */ - if (outputsWritten & (1 << FRAG_RESULT_DEPR)) { - const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPR][2]; + if (outputsWritten & (1 << FRAG_RESULT_DEPTH)) { + const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPTH][2]; if (depth <= 0.0) span->array->z[i] = 0; else if (depth >= 1.0) @@ -259,12 +259,12 @@ _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span ) run_program(ctx, span, 0, span->end); - if (program->Base.OutputsWritten & (1 << FRAG_RESULT_COLR)) { + if (program->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { span->interpMask &= ~SPAN_RGBA; span->arrayMask |= SPAN_RGBA; } - if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) { + if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) { span->interpMask &= ~SPAN_Z; span->arrayMask |= SPAN_Z; } -- cgit v1.2.3 From 7db7ff878d3e5a6b345228e6eaee4797bb68b360 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 14 Apr 2009 22:14:30 -0600 Subject: mesa: merge the prog_src_register::NegateBase and NegateAbs fields There's really no need for two negation fields. This came from the GL_NV_fragment_program extension. The new, unified Negate bitfield applies after the absolute value step. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 10 ++--- src/mesa/drivers/dri/i965/brw_vs_constval.c | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 8 ++-- src/mesa/drivers/dri/i965/brw_wm_fp.c | 13 +++--- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 6 +-- src/mesa/drivers/dri/i965/brw_wm_pass0.c | 2 +- src/mesa/drivers/dri/r200/r200_vertprog.c | 34 ++++++++-------- src/mesa/drivers/dri/r300/r300_fragprog.c | 4 +- src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c | 17 ++++---- src/mesa/drivers/dri/r300/r300_vertprog.c | 49 +++++++++-------------- src/mesa/drivers/dri/r300/r500_fragprog.c | 19 +++++---- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 7 ++-- src/mesa/drivers/dri/r300/radeon_program_alu.c | 20 +++++---- src/mesa/drivers/dri/r300/radeon_program_pair.c | 11 +++-- src/mesa/main/ffvertex_prog.c | 3 +- src/mesa/main/texenvprogram.c | 3 +- src/mesa/shader/arbprogparse.c | 8 ++-- src/mesa/shader/nvfragparse.c | 33 ++++++++------- src/mesa/shader/nvvertparse.c | 10 ++--- src/mesa/shader/prog_execute.c | 27 ++++--------- src/mesa/shader/prog_instruction.h | 32 +++------------ src/mesa/shader/prog_print.c | 20 ++++----- src/mesa/shader/programopt.c | 2 +- src/mesa/shader/slang/slang_emit.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 20 ++++----- 26 files changed, 156 insertions(+), 208 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 52f09a4b1b..a5158de945 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -162,12 +162,12 @@ src_vector(struct i915_fragment_program *p, GET_SWZ(source->Swizzle, 1), GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); - if (source->NegateBase) + if (source->Negate) src = negate(src, - GET_BIT(source->NegateBase, 0), - GET_BIT(source->NegateBase, 1), - GET_BIT(source->NegateBase, 2), - GET_BIT(source->NegateBase, 3)); + GET_BIT(source->Negate, 0), + GET_BIT(source->Negate, 1), + GET_BIT(source->Negate, 2), + GET_BIT(source->Negate, 3)); return src; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index d29eb17f8c..2637344b48 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -96,7 +96,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 2ee63129bc..42f6a99142 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -899,7 +899,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } @@ -945,7 +945,7 @@ static void emit_swz( struct brw_vs_compile *c, GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -997,8 +997,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index a7f5f1b9a2..1798d842c7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -569,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -604,7 +603,7 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } if (dst.WriteMask & WRITEMASK_YZ) { @@ -651,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -1050,14 +1049,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 49fea2e41a..385efd2dd3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -340,7 +340,7 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->NegateBase) + if (src->Negate & (1 << component)) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); @@ -377,7 +377,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, else { /* other type of source register */ return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + src->Negate, src->Abs); } } @@ -402,7 +402,7 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->NegateBase) + if (src->Negate & (1 << channel)) value = -value; if (src->Abs) value = FABSF(value); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 2debd0678a..92142764f5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -322,7 +322,7 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.NegateBase & (1<hw_reg.negate ^= 1; if (src.Abs) { diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index a2561df579..4ce93b5145 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -290,7 +290,7 @@ static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_regis t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) @@ -302,7 +302,7 @@ static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_sr t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } static unsigned long t_opcode(enum prog_opcode opcode) @@ -700,7 +700,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; } @@ -712,12 +712,12 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_ZERO, SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -766,11 +766,11 @@ if ((o_inst - vp->instr) == 31) { o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); } else { o_inst->src1 = t_src(vp, &src[1]); @@ -792,7 +792,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), @@ -800,7 +800,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -815,7 +815,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -831,7 +831,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -846,7 +846,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -874,7 +874,7 @@ else { VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src2 = UNUSED_SRC_0; u_temp_i--; @@ -899,7 +899,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z @@ -907,7 +907,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -922,7 +922,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z @@ -930,7 +930,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 32182bb667..873cde4414 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -214,9 +214,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c index a86d2bd471..191853ac1f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c @@ -92,7 +92,7 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { if (reg.Abs) - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; if (opcode == OPCODE_KIL || opcode == OPCODE_TEX || @@ -100,7 +100,8 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) opcode == OPCODE_TXP) { int j; - if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + if (reg.Abs || (reg.Negate != NEGATE_XYZW && + reg.Negate != NEGATE_NONE)) return GL_FALSE; for(j = 0; j < 4; ++j) { @@ -121,7 +122,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) relevant |= 1 << j; - if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + if ((reg.Negate & relevant) && (reg.Negate & relevant) != relevant) return GL_FALSE; if (!lookup_native_swizzle(reg.Swizzle)) @@ -137,7 +138,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { if (src.Abs) - src.NegateBase = 0; + src.Negate = NEGATE_NONE; while(dst.WriteMask) { const struct swizzle_data *best_swizzle = 0; @@ -170,11 +171,11 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - if ((src.NegateBase & best_matchmask) != 0) { - best_matchmask &= src.NegateBase; - rgbnegate = !src.NegateAbs; + if ((src.Negate & best_matchmask) != 0) { + best_matchmask &= src.Negate; + rgbnegate = !src.Negate; } else { - rgbnegate = src.NegateAbs; + rgbnegate = src.Negate; } struct prog_instruction *inst; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 50806575ce..146daa367c 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -245,7 +245,7 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -254,13 +254,13 @@ static unsigned long t_src(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -269,8 +269,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src-> - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } @@ -307,7 +306,7 @@ static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = 0; @@ -369,8 +368,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), @@ -378,8 +376,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -422,8 +419,7 @@ static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = t_src(vp, &src[1]); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -519,7 +515,7 @@ static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, /* Not 100% sure about this */ (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE /*VSF_FLAG_ALL */ ); inst[3] = __CONST(0, SWIZZLE_ZERO); (*u_temp_i)--; @@ -564,8 +560,7 @@ static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); @@ -592,24 +587,21 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); return inst; @@ -837,7 +829,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = 0; #else @@ -857,7 +849,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); #endif @@ -905,16 +897,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); inst += 4; @@ -931,15 +921,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 07a2a7b17c..292573de89 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -156,9 +156,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -314,8 +314,8 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (reg.Abs) return GL_FALSE; - if (reg.NegateAbs) - reg.NegateBase ^= 15; + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; if (opcode == OPCODE_KIL) { if (reg.Swizzle != SWIZZLE_NOOP) @@ -324,7 +324,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(reg.Swizzle, i); if (swz == SWIZZLE_NIL) { - reg.NegateBase &= ~(1 << i); + reg.Negate &= ~(1 << i); continue; } if (swz >= 4) @@ -332,15 +332,14 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) } } - if (reg.NegateBase) + if (reg.Negate) return GL_FALSE; return GL_TRUE; } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs - && !reg.NegateBase && !reg.NegateAbs) + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) return GL_TRUE; return GL_FALSE; @@ -355,7 +354,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) relevant |= 1 << i; } - if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; return GL_TRUE; @@ -379,7 +378,7 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, GLuint swz = GET_SWZ(src.Swizzle, i); if (swz == SWIZZLE_NIL) continue; - negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index a083c3d243..4a2e1cba40 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -61,12 +61,12 @@ static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_reg struct prog_src_register tmp = srcreg; int i; tmp.Swizzle = 0; - tmp.NegateBase = 0; + tmp.Negate = NEGATE_NONE; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(swizzle, i); if (swz < 4) { tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; } else { tmp.Swizzle |= swz << (i*3); } @@ -103,9 +103,8 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; inst->SrcReg[src].Abs = 0; - inst->SrcReg[src].NegateAbs = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) inst->SrcReg[src].Swizzle |= i << (3*i); diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 1ef71e74dc..ebc5c913b2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -89,8 +89,9 @@ static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int sw static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) { - SrcReg->NegateBase &= ~(1 << coordinate); - SrcReg->NegateBase |= (negate << coordinate); + /* XXX note sure about this negation logic here */ + SrcReg->Negate &= ~(1 << coordinate); + SrcReg->Negate |= (negate << coordinate); } static struct prog_dst_register dstreg(int file, int index) @@ -156,15 +157,14 @@ static struct prog_src_register absolute(struct prog_src_register reg) { struct prog_src_register newreg = reg; newreg.Abs = 1; - newreg.NegateBase = 0; - newreg.NegateAbs = 0; + newreg.Negate = NEGATE_NONE; return newreg; } static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; - newreg.NegateAbs = !newreg.NegateAbs; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; return newreg; } @@ -189,8 +189,7 @@ static void transform_ABS(struct radeon_transform_context* t, { struct prog_src_register src = inst->SrcReg[0]; src.Abs = 1; - src.NegateBase = 0; - src.NegateAbs = 0; + src.Negate = NEGATE_NONE; emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); } @@ -198,14 +197,13 @@ static void transform_DPH(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src0 = inst->SrcReg[0]; - if (src0.NegateAbs) { + if (src0.Negate) { if (src0.Abs) { int tempreg = radeonFindFreeTemporary(t); emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); src0 = srcreg(src0.File, src0.Index); } else { - src0.NegateAbs = 0; - src0.NegateBase ^= NEGATE_XYZW; + src0.Negate ^= NEGATE_XYZW; } } set_swizzle(&src0, 3, SWIZZLE_ONE); @@ -649,7 +647,7 @@ GLboolean radeonTransformDeriv(struct radeon_transform_context* t, B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - B.NegateBase = NEGATE_XYZW; + B.Negate = NEGATE_XYZW; emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], B); diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index f398404f9f..ecc82ff8a8 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -255,8 +255,7 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) inst->SrcReg[2] = inst->SrcReg[1]; inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].NegateBase = 0; - inst->SrcReg[1].NegateAbs = 0; + inst->SrcReg[1].Negate = NEGATE_NONE; inst->Opcode = OPCODE_MAD; break; case OPCODE_CMP: @@ -730,7 +729,7 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; - if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) + if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].Negate, j)) negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); @@ -739,12 +738,12 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].Negate; } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); + GLuint negatebase = GET_BIT(inst->SrcReg[i].Negate, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; @@ -756,7 +755,7 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].Negate; } } diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 03f42704a7..1ce5685af4 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -570,9 +570,8 @@ static void emit_arg( struct prog_src_register *src, src->File = reg.file; src->Index = reg.idx; src->Swizzle = reg.swz; - src->NegateBase = reg.negate ? NEGATE_XYZW : 0; + src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; src->Abs = 0; - src->NegateAbs = 0; src->RelAddr = 0; /* Check that bitfield sizes aren't exceeded */ ASSERT(src->Index == reg.idx); diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index 4a124bf27e..a70d069bd9 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -663,9 +663,8 @@ static void emit_arg( struct prog_src_register *reg, reg->File = ureg.file; reg->Index = ureg.idx; reg->Swizzle = ureg.swz; - reg->NegateBase = ureg.negatebase ? 0xf : 0x0; + reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE; reg->Abs = ureg.abs; - reg->NegateAbs = ureg.negateabs; } static void emit_dst( struct prog_dst_register *dst, diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 35253daa2e..b47bf360cf 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -2669,7 +2669,7 @@ parse_vector_src_reg(GLcontext *ctx, const GLubyte **inst, reg->File = file; reg->Index = index; reg->Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - reg->NegateBase = negateMask; + reg->Negate = negateMask; reg->RelAddr = isRelOffset; return 0; } @@ -2703,7 +2703,7 @@ parse_scalar_src_reg(GLcontext *ctx, const GLubyte **inst, reg->File = file; reg->Index = index; reg->Swizzle = (swizzle[0] << 0); - reg->NegateBase = negateMask; + reg->Negate = negateMask; reg->RelAddr = isRelOffset; return 0; } @@ -3019,7 +3019,7 @@ parse_fp_instruction (GLcontext * ctx, const GLubyte ** inst, parse_extended_swizzle_mask(inst, swizzle, &negateMask); fp->SrcReg[0].File = file; fp->SrcReg[0].Index = index; - fp->SrcReg[0].NegateBase = negateMask; + fp->SrcReg[0].Negate = negateMask; fp->SrcReg[0].Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], @@ -3363,7 +3363,7 @@ parse_vp_instruction (GLcontext * ctx, const GLubyte ** inst, parse_extended_swizzle_mask (inst, swizzle, &negateMask); vp->SrcReg[0].File = file; vp->SrcReg[0].Index = index; - vp->SrcReg[0].NegateBase = negateMask; + vp->SrcReg[0].Negate = negateMask; vp->SrcReg[0].Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], diff --git a/src/mesa/shader/nvfragparse.c b/src/mesa/shader/nvfragparse.c index 56b7c29bea..0fd55524ab 100644 --- a/src/mesa/shader/nvfragparse.c +++ b/src/mesa/shader/nvfragparse.c @@ -957,6 +957,7 @@ Parse_VectorSrc(struct parse_state *parseState, GLfloat sign = 1.0F; GLubyte token[100]; GLint idx; + GLuint negateBase, negateAbs; /* * First, take care of +/- and absolute value stuff. @@ -968,21 +969,23 @@ Parse_VectorSrc(struct parse_state *parseState, if (Parse_String(parseState, "|")) { srcReg->Abs = GL_TRUE; - srcReg->NegateAbs = (sign < 0.0F) ? GL_TRUE : GL_FALSE; + negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; if (Parse_String(parseState, "-")) - srcReg->NegateBase = NEGATE_XYZW; + negateBase = NEGATE_XYZW; else if (Parse_String(parseState, "+")) - srcReg->NegateBase = NEGATE_NONE; + negateBase = NEGATE_NONE; else - srcReg->NegateBase = NEGATE_NONE; + negateBase = NEGATE_NONE; } else { srcReg->Abs = GL_FALSE; - srcReg->NegateAbs = GL_FALSE; - srcReg->NegateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; + negateAbs = NEGATE_NONE; + negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; } + srcReg->Negate = srcReg->Abs ? negateAbs : negateBase; + /* This should be the real src vector/register name */ if (!Peek_Token(parseState, token)) RETURN_ERROR; @@ -1083,6 +1086,7 @@ Parse_ScalarSrcReg(struct parse_state *parseState, GLfloat sign = 1.0F; GLboolean needSuffix = GL_TRUE; GLint idx; + GLuint negateBase, negateAbs; /* * First, take care of +/- and absolute value stuff. @@ -1094,21 +1098,23 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (Parse_String(parseState, "|")) { srcReg->Abs = GL_TRUE; - srcReg->NegateAbs = (sign < 0.0F) ? GL_TRUE : GL_FALSE; + negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; if (Parse_String(parseState, "-")) - srcReg->NegateBase = NEGATE_XYZW; + negateBase = NEGATE_XYZW; else if (Parse_String(parseState, "+")) - srcReg->NegateBase = NEGATE_NONE; + negateBase = NEGATE_NONE; else - srcReg->NegateBase = NEGATE_NONE; + negateBase = NEGATE_NONE; } else { srcReg->Abs = GL_FALSE; - srcReg->NegateAbs = GL_FALSE; - srcReg->NegateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; + negateAbs = NEGATE_NONE; + negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE; } + srcReg->Negate = srcReg->Abs ? negateAbs : negateBase; + if (!Peek_Token(parseState, token)) RETURN_ERROR; @@ -1247,9 +1253,8 @@ Parse_PrintInstruction(struct parse_state *parseState, } inst->SrcReg[0].Swizzle = SWIZZLE_NOOP; - inst->SrcReg[0].NegateBase = NEGATE_NONE; inst->SrcReg[0].Abs = GL_FALSE; - inst->SrcReg[0].NegateAbs = GL_FALSE; + inst->SrcReg[0].Negate = NEGATE_NONE; return GL_TRUE; } diff --git a/src/mesa/shader/nvvertparse.c b/src/mesa/shader/nvvertparse.c index 624262395b..f5e2df2670 100644 --- a/src/mesa/shader/nvvertparse.c +++ b/src/mesa/shader/nvvertparse.c @@ -641,12 +641,12 @@ Parse_SwizzleSrcReg(struct parse_state *parseState, struct prog_src_register *sr RETURN_ERROR; if (token[0] == '-') { (void) Parse_String(parseState, "-"); - srcReg->NegateBase = NEGATE_XYZW; + srcReg->Negate = NEGATE_XYZW; if (!Peek_Token(parseState, token)) RETURN_ERROR; } else { - srcReg->NegateBase = NEGATE_NONE; + srcReg->Negate = NEGATE_NONE; } /* Src reg can be R, c[n], c[n +/- offset], or a named vertex attrib */ @@ -734,13 +734,13 @@ Parse_ScalarSrcReg(struct parse_state *parseState, struct prog_src_register *src if (!Peek_Token(parseState, token)) RETURN_ERROR; if (token[0] == '-') { - srcReg->NegateBase = NEGATE_XYZW; + srcReg->Negate = NEGATE_XYZW; (void) Parse_String(parseState, "-"); /* consume '-' */ if (!Peek_Token(parseState, token)) RETURN_ERROR; } else { - srcReg->NegateBase = NEGATE_NONE; + srcReg->Negate = NEGATE_NONE; } /* Src reg can be R, c[n], c[n +/- offset], or a named vertex attrib */ @@ -1062,7 +1062,7 @@ Parse_PrintInstruction(struct parse_state *parseState, struct prog_instruction * RETURN_ERROR; srcReg->RelAddr = GL_FALSE; - srcReg->NegateBase = NEGATE_NONE; + srcReg->Negate = NEGATE_NONE; srcReg->Swizzle = SWIZZLE_NOOP; /* Register can be R, c[n], c[n +/- offset], a named vertex attrib, diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index bdac1d4f8a..68a59350a1 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -212,19 +212,14 @@ fetch_vector4(const struct prog_src_register *source, result[3] = src[GET_SWZ(source->Swizzle, 3)]; } - if (source->NegateBase) { - result[0] = -result[0]; - result[1] = -result[1]; - result[2] = -result[2]; - result[3] = -result[3]; - } if (source->Abs) { result[0] = FABSF(result[0]); result[1] = FABSF(result[1]); result[2] = FABSF(result[2]); result[3] = FABSF(result[3]); } - if (source->NegateAbs) { + if (source->Negate) { + ASSERT(source->Negate == NEGATE_XYZW); result[0] = -result[0]; result[1] = -result[1]; result[2] = -result[2]; @@ -259,7 +254,7 @@ fetch_vector4ui(const struct prog_src_register *source, result[3] = src[GET_SWZ(source->Swizzle, 3)]; } - /* Note: no NegateBase, Abs, NegateAbs here */ + /* Note: no Negate or Abs here */ } @@ -299,19 +294,14 @@ fetch_vector4_deriv(GLcontext * ctx, result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; - if (source->NegateBase) { - result[0] = -result[0]; - result[1] = -result[1]; - result[2] = -result[2]; - result[3] = -result[3]; - } if (source->Abs) { result[0] = FABSF(result[0]); result[1] = FABSF(result[1]); result[2] = FABSF(result[2]); result[3] = FABSF(result[3]); } - if (source->NegateAbs) { + if (source->Negate) { + ASSERT(source->Negate == NEGATE_XYZW); result[0] = -result[0]; result[1] = -result[1]; result[2] = -result[2]; @@ -336,13 +326,10 @@ fetch_vector1(const struct prog_src_register *source, result[0] = src[GET_SWZ(source->Swizzle, 0)]; - if (source->NegateBase) { - result[0] = -result[0]; - } if (source->Abs) { result[0] = FABSF(result[0]); } - if (source->NegateAbs) { + if (source->Negate) { result[0] = -result[0]; } } @@ -1514,7 +1501,7 @@ _mesa_execute_program(GLcontext * ctx, ASSERT(swz <= 3); result[i] = src[swz]; } - if (source->NegateBase & (1 << i)) + if (source->Negate & (1 << i)) result[i] = -result[i]; } store_vector4(inst, machine, result); diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index 4adce11f95..3109f6cbae 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -261,37 +261,15 @@ struct prog_src_register GLuint Swizzle:12; GLuint RelAddr:1; - /** - * \name Source register "sign" control. - * - * The ARB and NV extensions allow varrying degrees of control over the - * sign of the source vector components. These values allow enough control - * for all flavors of the extensions. - */ - /*@{*/ - /** - * Per-component negation for the SWZ instruction. For non-SWZ - * instructions the only possible values are NEGATE_XYZW and NEGATE_NONE. - * - * \since - * ARB_vertex_program, ARB_fragment_program - */ - GLuint NegateBase:4; - - /** - * Take the component-wise absolute value. - * - * \since - * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2, - * NV_vertex_program2_option. - */ + /** Take the component-wise absolute value */ GLuint Abs:1; /** - * Post-absolute value negation (all components). + * Post-Abs negation. + * This will either be NEGATE_NONE or NEGATE_XYZW, except for the SWZ + * instruction which allows per-component negation. */ - GLuint NegateAbs:1; - /*@}*/ + GLuint Negate:4; }; diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index b832ddb477..d73c619fea 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -325,19 +325,19 @@ reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode, * \param extended if true, also allow 0, 1 values */ const char * -_mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) +_mesa_swizzle_string(GLuint swizzle, GLuint negateMask, GLboolean extended) { static const char swz[] = "xyzw01!?"; /* See SWIZZLE_x definitions */ static char s[20]; GLuint i = 0; - if (!extended && swizzle == SWIZZLE_NOOP && negateBase == 0) + if (!extended && swizzle == SWIZZLE_NOOP && negateMask == 0) return ""; /* no swizzle/negation */ if (!extended) s[i++] = '.'; - if (negateBase & NEGATE_X) + if (negateMask & NEGATE_X) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 0)]; @@ -345,7 +345,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & NEGATE_Y) + if (negateMask & NEGATE_Y) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 1)]; @@ -353,7 +353,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & NEGATE_Z) + if (negateMask & NEGATE_Z) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 2)]; @@ -361,7 +361,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & NEGATE_W) + if (negateMask & NEGATE_W) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 3)]; @@ -465,14 +465,14 @@ fprint_src_reg(FILE *f, reg_string((gl_register_file) srcReg->File, srcReg->Index, mode, srcReg->RelAddr, prog), _mesa_swizzle_string(srcReg->Swizzle, - srcReg->NegateBase, GL_FALSE), + srcReg->Negate, GL_FALSE), abs); #if 0 _mesa_fprintf(f, "%s[%d]%s", file_string((gl_register_file) srcReg->File, mode), srcReg->Index, _mesa_swizzle_string(srcReg->Swizzle, - srcReg->NegateBase, GL_FALSE)); + srcReg->Negate, GL_FALSE)); #endif } @@ -566,7 +566,7 @@ _mesa_fprint_instruction_opt(FILE *f, mode), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, - inst->SrcReg[0].NegateBase, GL_FALSE)); + inst->SrcReg[0].Negate, GL_FALSE)); } if (inst->Comment) _mesa_fprintf(f, " # %s", inst->Comment); @@ -583,7 +583,7 @@ _mesa_fprint_instruction_opt(FILE *f, mode), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, - inst->SrcReg[0].NegateBase, GL_TRUE)); + inst->SrcReg[0].Negate, GL_TRUE)); fprint_comment(f, inst); break; case OPCODE_TEX: diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index e283f8933b..ecd98dc85c 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -241,7 +241,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->DstReg.WriteMask = WRITEMASK_X; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = fogFactorTemp; - inst->SrcReg[0].NegateBase = NEGATE_XYZW; + inst->SrcReg[0].Negate = NEGATE_XYZW; inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; inst->SaturateMode = SATURATE_ZERO_ONE; inst++; diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 8493c490fb..3f455e0640 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -1135,7 +1135,7 @@ emit_negation(slang_emit_info *emitInfo, slang_ir_node *n) n->Children[0]->Store, NULL, NULL); - inst->SrcReg[0].NegateBase = NEGATE_XYZW; + inst->SrcReg[0].Negate = NEGATE_XYZW; return inst; } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 3b2ad00f5c..fa4f4082a7 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -147,7 +147,7 @@ make_bitmap_fragment_program(GLcontext *ctx, GLuint samplerIndex) p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX; p->Instructions[ic].SrcReg[0].Index = 0; - p->Instructions[ic].SrcReg[0].NegateBase = NEGATE_XYZW; + p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW; ic++; /* END; */ diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index ffa607dd87..43c9afccc3 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -275,8 +275,8 @@ compile_instruction( /* swizzle (ext swizzle also depends on negation) */ { GLuint swz[4]; - GLboolean extended = (inst->SrcReg[i].NegateBase != NEGATE_NONE && - inst->SrcReg[i].NegateBase != NEGATE_XYZW); + GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE && + inst->SrcReg[i].Negate != NEGATE_XYZW); for( j = 0; j < 4; j++ ) { swz[j] = GET_SWZ( inst->SrcReg[i].Swizzle, j ); if (swz[j] > SWIZZLE_W) @@ -296,20 +296,20 @@ compile_instruction( } } - if( inst->SrcReg[i].NegateBase == NEGATE_XYZW ) { + if( inst->SrcReg[i].Negate == NEGATE_XYZW ) { fullsrc->SrcRegister.Negate = 1; } - else if( inst->SrcReg[i].NegateBase != NEGATE_NONE ) { - if( inst->SrcReg[i].NegateBase & NEGATE_X ) { + else if( inst->SrcReg[i].Negate != NEGATE_NONE ) { + if( inst->SrcReg[i].Negate & NEGATE_X ) { fullsrc->SrcRegisterExtSwz.NegateX = 1; } - if( inst->SrcReg[i].NegateBase & NEGATE_Y ) { + if( inst->SrcReg[i].Negate & NEGATE_Y ) { fullsrc->SrcRegisterExtSwz.NegateY = 1; } - if( inst->SrcReg[i].NegateBase & NEGATE_Z ) { + if( inst->SrcReg[i].Negate & NEGATE_Z ) { fullsrc->SrcRegisterExtSwz.NegateZ = 1; } - if( inst->SrcReg[i].NegateBase & NEGATE_W ) { + if( inst->SrcReg[i].Negate & NEGATE_W ) { fullsrc->SrcRegisterExtSwz.NegateW = 1; } } @@ -318,10 +318,6 @@ compile_instruction( fullsrc->SrcRegisterExtMod.Absolute = 1; } - if( inst->SrcReg[i].NegateAbs ) { - fullsrc->SrcRegisterExtMod.Negate = 1; - } - if( inst->SrcReg[i].RelAddr ) { fullsrc->SrcRegister.Indirect = 1; -- cgit v1.2.3 From d7d5c97a215e6845ffa9fc60cee52da6a2d3148a Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Thu, 16 Apr 2009 17:50:13 +0200 Subject: r300: cleanup frag prog setup a little Use proper fields for marking if fp is translated, and if is translated succesfully. Now if fp gets translated (even unsuccesfully) fp->translated is true. If the translation failed (i.e. because we exceeded limit of maximum texture indirections) the fp->error is set. With a little updated fallback function it prevents non native fragment programs from beeing translated with every frame (the translation would fail anyway so there's no point to try again). Also implement IsProgramNative function for GL_FRAGMENT_PROGRAM_ARB (it should give some performance boost in apps that checks if program is native and falls back to simpler shader to meet hw limits if necessary) and cleanup indentation (remove whitespaces on empty lines). --- src/mesa/drivers/dri/r300/r300_fragprog.c | 6 ++-- src/mesa/drivers/dri/r300/r300_render.c | 32 +++++++++---------- src/mesa/drivers/dri/r300/r300_shader.c | 21 ++++++++++++- src/mesa/drivers/dri/r300/r300_state.c | 52 +++++++++++++++---------------- src/mesa/drivers/dri/r300/r500_fragprog.c | 7 +++-- 5 files changed, 70 insertions(+), 48 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 32182bb667..f2d7cec5d3 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -175,7 +175,7 @@ static GLboolean transform_TEX( inst.SrcReg[0].File = PROGRAM_TEMPORARY; inst.SrcReg[0].Index = tmpreg; } - + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -466,8 +466,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300, _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); - if (!fp->error) - fp->translated = GL_TRUE; + fp->translated = GL_TRUE; + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) r300FragmentProgramDump(fp, &fp->code); r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 924305dd12..d33396e150 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -194,7 +194,7 @@ static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) ((vertex_count + 0) << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - + if (!rmesa->radeon.radeonScreen->kernel_mm) { OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | @@ -224,12 +224,12 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) uint32_t voffset; int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - + if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - + if (!rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH(sz+2+(nr * 2)); OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); @@ -240,7 +240,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH_RELOC(voffset, @@ -256,7 +256,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) RADEON_GEM_DOMAIN_GTT, 0, 0); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -280,7 +280,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH(voffset); @@ -288,7 +288,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; OUT_BATCH(voffset); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -427,7 +427,7 @@ static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; - + FALLBACK_IF(r300->radeon.Fallback); /* Do we need to use new-style shaders? * Also is there a better way to do this? */ @@ -435,19 +435,19 @@ static int r300Fallback(GLcontext * ctx) struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; if (fp) { - if (!fp->translated) { + if (!fp->translated) r500TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); - } + + FALLBACK_IF(fp->error); } } else { struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; if (fp) { - if (!fp->translated) { + if (!fp->translated) r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); - } + + FALLBACK_IF(fp->error); } } @@ -492,7 +492,7 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx, if (!r300ValidateBuffers(ctx)) return GL_TRUE; - + return r300RunRender(ctx, stage); } @@ -517,7 +517,7 @@ static GLboolean r300RunTCLRender(GLcontext * ctx, if (!r300ValidateBuffers(ctx)) return GL_TRUE; - + r300UpdateShaders(rmesa); vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index f30fd986e0..d90658ba47 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -81,7 +81,26 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { - return GL_TRUE; + if (target == GL_FRAGMENT_PROGRAM_ARB) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; + + if (!r500_fp->translated) + r500TranslateFragmentShader(rmesa, r500_fp); + + return !r500_fp->error; + } else { + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + + if (!r300_fp->translated) + r300TranslateFragmentShader(rmesa, r300_fp); + + return !r300_fp->error; + } + } else + return GL_TRUE; } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index f464335422..64ec870976 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2301,7 +2301,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, } -static void r300SetupPixelShader(r300ContextPtr rmesa) +static GLboolean r300SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; struct r300_fragment_program *fp = (struct r300_fragment_program *) @@ -2309,15 +2309,12 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) struct r300_fragment_program_code *code; int i, k; - if (!fp) /* should only happenen once, just after context is created */ - return; - r300TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } + + /* Program is not native, fallback to software */ + if (fp->error) + return GL_FALSE; + code = &fp->code; r300SetupTextures(ctx); @@ -2369,6 +2366,8 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); } + + return GL_TRUE; } #define bump_r500fp_count(ptr, new_count) do{\ @@ -2385,7 +2384,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ } while(0) -static void r500SetupPixelShader(r300ContextPtr rmesa) +static GLboolean r500SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; struct r500_fragment_program *fp = (struct r500_fragment_program *) @@ -2393,18 +2392,15 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) int i; struct r500_fragment_program_code *code; - if (!fp) /* should only happenen once, just after context is created */ - return; - ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; r500TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } + + /* Program is not native, fallback to software */ + if (fp->error) + return GL_FALSE; + code = &fp->code; r300SetupTextures(ctx); @@ -2445,6 +2441,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + return GL_TRUE; } void r300UpdateShaderStates(r300ContextPtr rmesa) @@ -2452,6 +2449,10 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) GLcontext *ctx; ctx = rmesa->radeon.glCtx; + /* should only happenen once, just after context is created */ + if (!ctx->FragmentProgram._Current) + return; + r300SetEarlyZState(ctx); /* w_fmt value is set to get best performance @@ -2475,19 +2476,18 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; } - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500SetupPixelShader(rmesa); - else - r300SetupPixelShader(rmesa); - - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (!r500SetupPixelShader(rmesa)) + return; r500SetupRSUnit(ctx); - else + } else { + if (!r300SetupPixelShader(rmesa)) + return; r300SetupRSUnit(ctx); + } if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); - } /** diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 07a2a7b17c..1b8343ab21 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -493,7 +493,10 @@ void r500TranslateFragmentShader(r300ContextPtr r300, _mesa_print_program(compiler.program); } - fp->translated = r500FragmentProgramEmit(&compiler); + if (!r500FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; + + fp->translated = GL_TRUE; /* Subtle: Rescue any parameters that have been added during transformations */ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); @@ -505,7 +508,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); if (RADEON_DEBUG & DEBUG_PIXEL) { - if (fp->translated) { + if (!fp->error) { _mesa_printf("Machine-readable code:\n"); dump_program(&fp->code); } -- cgit v1.2.3 From 27d4546f600cb444f07a4d510a328540ff37f761 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 02:42:35 +0200 Subject: r300: r300/r500 fp shader merge WIP --- src/mesa/drivers/dri/r300/r300_context.c | 1 + src/mesa/drivers/dri/r300/r300_context.h | 9 ++++++ src/mesa/drivers/dri/r300/r300_fragprog.c | 46 +++++++++++++-------------- src/mesa/drivers/dri/r300/r300_fragprog.h | 4 +-- src/mesa/drivers/dri/r300/r300_render.c | 10 +++--- src/mesa/drivers/dri/r300/r300_shader.c | 9 +++--- src/mesa/drivers/dri/r300/r300_state.c | 53 +++++++++++++++++-------------- src/mesa/drivers/dri/r300/r500_fragprog.c | 44 ++++++++++++------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 3 +- 9 files changed, 94 insertions(+), 85 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index f16e5486f6..10836bb16a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -392,6 +392,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs( ctx ); r300InitCmdBuf(r300); r300InitState(r300); + r300InitShaderFunctions(r300); if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) r300InitSwtcl(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index c3d91187a7..8d0f95e31e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -641,6 +641,13 @@ struct r300_swtcl_info { int sw_tcl_inputs[VERT_ATTRIB_MAX]; }; +struct r300_vtable { + void (* SetupRSUnit)(GLcontext *ctx); + void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); + void ( *TranslateFragmentShader)(GLcontext *ctx, struct gl_fragment_program *fp); + GLboolean (* SetupPixelShader)(GLcontext *ctx); +}; + /** * \brief R300 context structure. @@ -648,6 +655,8 @@ struct r300_swtcl_info { struct r300_context { struct radeon_context radeon; /* parent class, must be first */ + struct r300_vtable vtbl; + struct r300_hw_state hw; struct r300_vertex_shader_state vertex_shader; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index f2d7cec5d3..2c3abb216b 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -401,25 +401,26 @@ static void build_state( } -void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp) +void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) { + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; struct r300_fragment_program_external_state state; - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + build_state(r300, r300_fp, &state); + if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); + r300_fp->translated = GL_FALSE; + _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); } - if (!fp->translated) { + if (!r300_fp->translated) { struct r300_fragment_program_compiler compiler; compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + compiler.fp = r300_fp; + compiler.code = &r300_fp->code; + compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Fragment Program: Initial program:\n"); @@ -433,10 +434,7 @@ void r300TranslateFragmentShader(r300ContextPtr r300, { &radeonTransformALU, 0 }, { &radeonTransformTrigSimple, 0 } }; - radeonLocalTransform( - r300->radeon.glCtx, - compiler.program, - 3, transformations); + radeonLocalTransform(ctx, compiler.program, 3, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Fragment Program: After native rewrite:\n"); @@ -449,7 +447,7 @@ void r300TranslateFragmentShader(r300ContextPtr r300, .BuildSwizzle = &r300FPBuildSwizzle, .RewriteDepthOut = GL_TRUE }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + radeonNqssaDce(ctx, compiler.program, &nqssadce); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: after NqSSA-DCE:\n"); @@ -457,23 +455,23 @@ void r300TranslateFragmentShader(r300ContextPtr r300, } if (!r300FragmentProgramEmit(&compiler)) - fp->error = GL_TRUE; + r300_fp->error = GL_TRUE; /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; + _mesa_free_parameter_list(fp->Base.Parameters); + fp->Base.Parameters = compiler.program->Parameters; compiler.program->Parameters = 0; - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); + _mesa_reference_program(ctx, &compiler.program, NULL); - fp->translated = GL_TRUE; + r300_fp->translated = GL_TRUE; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300FragmentProgramDump(fp, &fp->code); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300FragmentProgramDump(r300_fp, &r300_fp->code); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); } - update_params(r300, fp); + update_params(r300, r300_fp); } /* just some random things... */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 94fb554fb3..e1976277de 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -107,9 +107,7 @@ struct r300_fragment_program; -extern void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp); - +extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); /** * Used internally by the r300 fragment program code to store compile-time diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index d33396e150..ce333b8099 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -432,20 +432,18 @@ static int r300Fallback(GLcontext * ctx) /* Do we need to use new-style shaders? * Also is there a better way to do this? */ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; + struct r500_fragment_program *fp = (struct r500_fragment_program *) ctx->FragmentProgram._Current; if (fp) { if (!fp->translated) - r500TranslateFragmentShader(r300, fp); + r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); FALLBACK_IF(fp->error); } } else { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; if (fp) { if (!fp->translated) - r300TranslateFragmentShader(r300, fp); + r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); FALLBACK_IF(fp->error); } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index d90658ba47..ef0b5d037f 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -83,19 +83,20 @@ r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_fragment_program * fp = (struct gl_fragment_program *) prog; if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)fp; if (!r500_fp->translated) - r500TranslateFragmentShader(rmesa, r500_fp); + rmesa->vtbl.TranslateFragmentShader(ctx, fp); return !r500_fp->error; } else { - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; if (!r300_fp->translated) - r300TranslateFragmentShader(rmesa, r300_fp); + rmesa->vtbl.TranslateFragmentShader(ctx, fp); return !r300_fp->error; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 86b85d525f..09f83f3d12 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1399,9 +1399,8 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); } - r300SetupFragmentShaderTextures(ctx, tmu_mappings); - } else - r500SetupFragmentShaderTextures(ctx, tmu_mappings); + } + r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", @@ -2300,16 +2299,13 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, } -static GLboolean r300SetupPixelShader(r300ContextPtr rmesa) +static GLboolean r300SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; struct r300_fragment_program_code *code; int i, k; - r300TranslateFragmentShader(rmesa, fp); - /* Program is not native, fallback to software */ if (fp->error) return GL_FALSE; @@ -2383,19 +2379,16 @@ static GLboolean r300SetupPixelShader(r300ContextPtr rmesa) if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ } while(0) -static GLboolean r500SetupPixelShader(r300ContextPtr rmesa) +static GLboolean r500SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r500_fragment_program *fp = (struct r500_fragment_program *) ctx->FragmentProgram._Current; int i; struct r500_fragment_program_code *code; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - r500TranslateFragmentShader(rmesa, fp); - /* Program is not native, fallback to software */ if (fp->error) return GL_FALSE; @@ -2475,15 +2468,12 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; } - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - if (!r500SetupPixelShader(rmesa)) - return; - r500SetupRSUnit(ctx); - } else { - if (!r300SetupPixelShader(rmesa)) - return; - r300SetupRSUnit(ctx); - } + rmesa->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + + if (!rmesa->vtbl.SetupPixelShader(ctx)) + return; + + rmesa->vtbl.SetupRSUnit(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); @@ -2595,3 +2585,18 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->DrawBuffer = radeonDrawBuffer; functions->ReadBuffer = radeonReadBuffer; } + +void r300InitShaderFunctions(r300ContextPtr r300) +{ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r300->vtbl.SetupRSUnit = r500SetupRSUnit; + r300->vtbl.SetupPixelShader = r500SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; + r300->vtbl.TranslateFragmentShader = r500TranslateFragmentShader; + } else { + r300->vtbl.SetupRSUnit = r300SetupRSUnit; + r300->vtbl.SetupPixelShader = r300SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; + r300->vtbl.TranslateFragmentShader = r300TranslateFragmentShader; + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 1b8343ab21..df507b674e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -439,25 +439,26 @@ static void build_state( static void dump_program(struct r500_fragment_program_code *code); -void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp) +void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) { + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)fp; struct r500_fragment_program_external_state state; - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + build_state(r300, r500_fp, &state); + if (_mesa_memcmp(&r500_fp->state, &state, sizeof(state))) { /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); + r500_fp->translated = GL_FALSE; + _mesa_memcpy(&r500_fp->state, &state, sizeof(state)); } - if (!fp->translated) { + if (!r500_fp->translated) { struct r500_fragment_program_compiler compiler; compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + compiler.fp = r500_fp; + compiler.code = &r500_fp->code; + compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: Initial program:\n"); @@ -472,8 +473,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(r300->radeon.glCtx, compiler.program, - 4, transformations); + radeonLocalTransform(ctx, compiler.program, 4, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: after native rewrite:\n"); @@ -486,7 +486,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, .BuildSwizzle = &nqssadce_build_swizzle, .RewriteDepthOut = GL_TRUE }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + radeonNqssaDce(ctx, compiler.program, &nqssadce); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: after NqSSA-DCE:\n"); @@ -494,29 +494,29 @@ void r500TranslateFragmentShader(r300ContextPtr r300, } if (!r500FragmentProgramEmit(&compiler)) - fp->error = GL_TRUE; + r500_fp->error = GL_TRUE; - fp->translated = GL_TRUE; + r500_fp->translated = GL_TRUE; /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; + _mesa_free_parameter_list(fp->Base.Parameters); + fp->Base.Parameters = compiler.program->Parameters; compiler.program->Parameters = 0; - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); + _mesa_reference_program(ctx, &compiler.program, 0); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); if (RADEON_DEBUG & DEBUG_PIXEL) { - if (!fp->error) { + if (!r500_fp->error) { _mesa_printf("Machine-readable code:\n"); - dump_program(&fp->code); + dump_program(&r500_fp->code); } } } - update_params(r300, fp); + update_params(r300, r500_fp); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 1e45538f80..1456f7f467 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -47,8 +47,7 @@ struct r500_fragment_program; -extern void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp); +extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); struct r500_fragment_program_compiler { r300ContextPtr r300; -- cgit v1.2.3 From aa04e7d475f6d6028c06c42bedc3c7d37ee78a0e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 03:16:16 +0200 Subject: r300: merge r300/r500 fragment program structures --- src/mesa/drivers/dri/r300/r300_context.h | 60 ++++++-------------------- src/mesa/drivers/dri/r300/r300_fragprog.c | 18 ++++---- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 2 +- src/mesa/drivers/dri/r300/r300_render.c | 21 ++------- src/mesa/drivers/dri/r300/r300_shader.c | 55 +++++------------------ src/mesa/drivers/dri/r300/r300_state.c | 37 ++++++---------- src/mesa/drivers/dri/r300/r500_fragprog.c | 44 +++++++++---------- src/mesa/drivers/dri/r300/r500_fragprog.h | 2 +- 8 files changed, 70 insertions(+), 169 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 8d0f95e31e..0c7221b190 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -458,7 +458,7 @@ struct r300_vertex_program_cont { #define PFS_NUM_CONST_REGS 16 struct r300_pfs_compile_state; - +struct r500_pfs_compile_state; /** * Stores state that influences the compilation of a fragment program. @@ -528,47 +528,6 @@ struct r300_fragment_program_code { int max_temp_idx; }; -/** - * Store everything about a fragment program that is needed - * to render with that program. - */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLboolean translated; - GLboolean error; - - struct r300_fragment_program_external_state state; - struct r300_fragment_program_code code; - - GLboolean WritesDepth; - GLuint optimization; -}; - -struct r500_pfs_compile_state; - -struct r500_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; struct r500_fragment_program_code { struct { @@ -593,18 +552,23 @@ struct r500_fragment_program_code { int max_temp_idx; }; -struct r500_fragment_program { - struct gl_fragment_program mesa_program; +/** +* Store everything about a fragment program that is needed +* to render with that program. +*/ +struct r300_fragment_program { + struct gl_fragment_program Base; - GLcontext *ctx; GLboolean translated; GLboolean error; - struct r500_fragment_program_external_state state; - struct r500_fragment_program_code code; + struct r300_fragment_program_external_state state; + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; GLboolean writes_depth; - GLuint optimization; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 2c3abb216b..30f1bac72e 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -247,13 +247,11 @@ static GLboolean transform_TEX( } -static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) +static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) { - struct gl_fragment_program *mp = &fp->mesa_program; - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); + if (fp->Base.Parameters) + _mesa_load_state_parameters(ctx, fp->Base.Parameters); } @@ -270,7 +268,7 @@ static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) */ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; + GLuint InputsRead = compiler->fp->Base.Base.InputsRead; if (!(InputsRead & FRAG_BIT_WPOS)) return; @@ -391,7 +389,7 @@ static void build_state( _mesa_bzero(state, sizeof(*state)); for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + if (fp->Base.Base.ShadowSamplers & (1 << unit)) { struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); @@ -419,7 +417,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) compiler.r300 = r300; compiler.fp = r300_fp; - compiler.code = &r300_fp->code; + compiler.code = &r300_fp->code.r300; compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -467,11 +465,11 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) r300_fp->translated = GL_TRUE; if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300FragmentProgramDump(r300_fp, &r300_fp->code); + r300FragmentProgramDump(r300_fp, &r300_fp->code.r300); r300UpdateStateParameters(ctx, _NEW_PROGRAM); } - update_params(r300, r300_fp); + update_params(ctx, fp); } /* just some random things... */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 9f0b7e3534..690734a1eb 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -201,7 +201,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->WritesDepth = GL_TRUE; + c->fp->writes_depth = GL_TRUE; } return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index ce333b8099..91f58ade59 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -429,24 +429,11 @@ static int r300Fallback(GLcontext * ctx) const unsigned back = ctx->Stencil._BackFace; FALLBACK_IF(r300->radeon.Fallback); - /* Do we need to use new-style shaders? - * Also is there a better way to do this? */ - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *fp = (struct r500_fragment_program *) ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) - r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); - - FALLBACK_IF(fp->error); - } - } else { - struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) - r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); - FALLBACK_IF(fp->error); - } + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + if (fp && !fp->translated) { + r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + FALLBACK_IF(fp->error); } FALLBACK_IF(ctx->RenderMode != GL_RENDER); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index ef0b5d037f..68fd8cd21e 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -9,10 +9,8 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *r300_fp; - struct r500_fragment_program *r500_fp; + struct r300_fragment_program *fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -20,28 +18,12 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, vp = CALLOC_STRUCT(r300_vertex_program_cont); return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); - case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - r500_fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } case GL_FRAGMENT_PROGRAM_NV: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &fp->Base, target, id); + default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -57,20 +39,15 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500_fp->translated = GL_FALSE; - else - r300_fp->translated = GL_FALSE; + r300_fp->translated = GL_FALSE; break; } @@ -83,23 +60,11 @@ r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_fragment_program * fp = (struct gl_fragment_program *) prog; - - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)fp; - - if (!r500_fp->translated) - rmesa->vtbl.TranslateFragmentShader(ctx, fp); - - return !r500_fp->error; - } else { - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; - - if (!r300_fp->translated) - rmesa->vtbl.TranslateFragmentShader(ctx, fp); + struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + if (!fp->translated) + rmesa->vtbl.TranslateFragmentShader(ctx, &fp->Base); - return !r300_fp->error; - } + return !fp->error; } else return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 09f83f3d12..9304ffb342 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -449,18 +449,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - return (fp && fp->WritesDepth); - } else { - struct r500_fragment_program* fp = - (struct r500_fragment_program*)(char*) - ctx->FragmentProgram._Current; - return (fp && fp->writes_depth); - } + return (fp && fp->writes_depth); } static void r300SetEarlyZState(GLcontext * ctx) @@ -1072,7 +1063,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) if (!fp) return; - paramList = fp->mesa_program.Base.Parameters; + paramList = fp->Base.Base.Parameters; if (!paramList) return; @@ -1191,9 +1182,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r300_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code.r300; R300_STATECHANGE(r300, fpt); @@ -1234,9 +1224,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { int i; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r500_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1391,7 +1380,7 @@ static void r300SetupTextures(GLcontext * ctx) return; if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + if (fp->Base.UsesKill && last_hw_tmu < 0) { // The KILL operation requires the first texture unit // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; @@ -2310,7 +2299,7 @@ static GLboolean r300SetupPixelShader(GLcontext *ctx) if (fp->error) return GL_FALSE; - code = &fp->code; + code = &fp->code.r300; r300SetupTextures(ctx); @@ -2355,7 +2344,7 @@ static GLboolean r300SetupPixelShader(GLcontext *ctx) rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2382,7 +2371,7 @@ static GLboolean r300SetupPixelShader(GLcontext *ctx) static GLboolean r500SetupPixelShader(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r500_fragment_program *fp = (struct r500_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; int i; struct r500_fragment_program_code *code; @@ -2393,7 +2382,7 @@ static GLboolean r500SetupPixelShader(GLcontext *ctx) if (fp->error) return GL_FALSE; - code = &fp->code; + code = &fp->code.r500; r300SetupTextures(ctx); @@ -2425,7 +2414,7 @@ static GLboolean r500SetupPixelShader(GLcontext *ctx) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index df507b674e..f5804521ee 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -189,13 +189,11 @@ static GLboolean transform_TEX( } -static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) +static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) { - struct gl_fragment_program *mp = &fp->mesa_program; - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); + if (fp->Base.Parameters) + _mesa_load_state_parameters(ctx, fp->Base.Parameters); } @@ -212,7 +210,7 @@ static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) */ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) { - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; + GLuint InputsRead = compiler->fp->Base.Base.InputsRead; if (!(InputsRead & FRAG_BIT_WPOS)) return; @@ -420,15 +418,15 @@ static GLuint build_func(GLuint comparefunc) */ static void build_state( r300ContextPtr r300, - struct r500_fragment_program *fp, - struct r500_fragment_program_external_state *state) + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) { int unit; _mesa_bzero(state, sizeof(*state)); for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + if (fp->Base.Base.ShadowSamplers & (1 << unit)) { struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); @@ -442,22 +440,22 @@ static void dump_program(struct r500_fragment_program_code *code); void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)fp; - struct r500_fragment_program_external_state state; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; + struct r300_fragment_program_external_state state; - build_state(r300, r500_fp, &state); - if (_mesa_memcmp(&r500_fp->state, &state, sizeof(state))) { + build_state(r300, r300_fp, &state); + if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { /* TODO: cache compiled programs */ - r500_fp->translated = GL_FALSE; - _mesa_memcpy(&r500_fp->state, &state, sizeof(state)); + r300_fp->translated = GL_FALSE; + _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); } - if (!r500_fp->translated) { + if (!r300_fp->translated) { struct r500_fragment_program_compiler compiler; compiler.r300 = r300; - compiler.fp = r500_fp; - compiler.code = &r500_fp->code; + compiler.fp = r300_fp; + compiler.code = &r300_fp->code.r500; compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -494,9 +492,9 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) } if (!r500FragmentProgramEmit(&compiler)) - r500_fp->error = GL_TRUE; + r300_fp->error = GL_TRUE; - r500_fp->translated = GL_TRUE; + r300_fp->translated = GL_TRUE; /* Subtle: Rescue any parameters that have been added during transformations */ _mesa_free_parameter_list(fp->Base.Parameters); @@ -508,15 +506,15 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) r300UpdateStateParameters(ctx, _NEW_PROGRAM); if (RADEON_DEBUG & DEBUG_PIXEL) { - if (!r500_fp->error) { + if (!r300_fp->error) { _mesa_printf("Machine-readable code:\n"); - dump_program(&r500_fp->code); + dump_program(&r300_fp->code.r500); } } } - update_params(r300, r500_fp); + update_params(ctx, fp); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 1456f7f467..567a43cf61 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -51,7 +51,7 @@ extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_progr struct r500_fragment_program_compiler { r300ContextPtr r300; - struct r500_fragment_program *fp; + struct r300_fragment_program *fp; struct r500_fragment_program_code *code; struct gl_program *program; }; -- cgit v1.2.3 From 33af54af0da94e686ff6679d240a9ec246df3c7a Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 03:34:21 +0200 Subject: r300: merge r300/r500 fragment program compiler structure --- src/mesa/drivers/dri/r300/r300_context.h | 12 +++++++++--- src/mesa/drivers/dri/r300/r300_fragprog.c | 2 +- src/mesa/drivers/dri/r300/r300_fragprog.h | 8 -------- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 6 +++--- src/mesa/drivers/dri/r300/r300_state.c | 1 + src/mesa/drivers/dri/r300/r300_swtcl.c | 1 + src/mesa/drivers/dri/r300/r500_fragprog.c | 10 +++++----- src/mesa/drivers/dri/r300/r500_fragprog.h | 11 +---------- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 8 ++++---- 9 files changed, 25 insertions(+), 34 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 0c7221b190..ff59ae7ecf 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_common.h" #include "main/mtypes.h" +#include "shader/prog_instruction.h" struct r300_context; typedef struct r300_context r300ContextRec; @@ -66,8 +67,6 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r500_fragprog.h" - /* The blit width for texture uploads @@ -563,7 +562,7 @@ struct r300_fragment_program { GLboolean error; struct r300_fragment_program_external_state state; - union { + union rX00_fragment_program_code { struct r300_fragment_program_code r300; struct r500_fragment_program_code r500; } code; @@ -572,6 +571,13 @@ struct r300_fragment_program { GLuint optimization; }; +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + union rX00_fragment_program_code *code; + struct gl_program *program; +}; + #define R300_MAX_AOS_ARRAYS 16 diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 30f1bac72e..eae4c46f69 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -417,7 +417,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) compiler.r300 = r300; compiler.fp = r300_fp; - compiler.code = &r300_fp->code.r300; + compiler.code = &r300_fp->code; compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index e1976277de..631e40913c 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -105,20 +105,12 @@ #endif -struct r300_fragment_program; - extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); /** * Used internally by the r300 fragment program code to store compile-time * only data. */ -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - struct r300_fragment_program_code *code; - struct gl_program *program; -}; extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 690734a1eb..693d485de9 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -47,7 +47,7 @@ #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = c->code + struct r300_fragment_program_code *code = &c->code->r300 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -213,7 +213,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) */ static GLboolean finish_node(struct r300_fragment_program_compiler *c) { - struct r300_fragment_program_code *code = c->code; + struct r300_fragment_program_code *code = &c->code->r300; struct r300_fragment_program_node *node = &code->node[code->cur_node]; if (node->alu_end < 0) { @@ -327,7 +327,7 @@ static const struct radeon_pair_handler pair_handler = { */ GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = compiler->code; + struct r300_fragment_program_code *code = &compiler->code->r300; _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); code->node[0].alu_end = -1; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 9304ffb342..493c4be6a0 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_fragprog.h" #include "r300_tex.h" +#include "r500_fragprog.h" #include "drirenderbuffer.h" diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 934e1e2243..256a2bb5cb 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -34,6 +34,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "r300_state.h" #include "r300_swtcl.h" #include "r300_emit.h" #include "r300_tex.h" diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f5804521ee..526a0ea928 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -62,8 +62,8 @@ static GLboolean transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { - struct r500_fragment_program_compiler *compiler = - (struct r500_fragment_program_compiler*)data; + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; struct prog_instruction inst = *orig_inst; struct prog_instruction* tgt; GLboolean destredirect = GL_FALSE; @@ -208,7 +208,7 @@ static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) * \todo if/when r5xx supports the radeon_program architecture, this is a * likely candidate for code sharing. */ -static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { GLuint InputsRead = compiler->fp->Base.Base.InputsRead; @@ -451,11 +451,11 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) } if (!r300_fp->translated) { - struct r500_fragment_program_compiler compiler; + struct r300_fragment_program_compiler compiler; compiler.r300 = r300; compiler.fp = r300_fp; - compiler.code = &r300_fp->code.r500; + compiler.code = &r300_fp->code; compiler.program = _mesa_clone_program(ctx, &fp->Base); if (RADEON_DEBUG & DEBUG_PIXEL) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 567a43cf61..4e72ef7aac 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -45,17 +45,8 @@ #include "r300_state.h" #include "radeon_program.h" -struct r500_fragment_program; - extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); -struct r500_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - struct r500_fragment_program_code *code; - struct gl_program *program; -}; - -extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); +extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 4631235f0d..d9f81004e8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -49,8 +49,8 @@ #define PROG_CODE \ - struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = c->code + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->r500 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -299,9 +299,9 @@ static const struct radeon_pair_handler pair_handler = { .MaxHwTemps = 128 }; -GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = compiler->code; + struct r500_fragment_program_code *code = &compiler->code->r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; -- cgit v1.2.3 From 97104c255942ee781777818633ca6c17b4fea312 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 03:44:36 +0200 Subject: r300: further r300/r500 merge preparation --- src/mesa/drivers/dri/r300/r300_context.h | 3 ++- src/mesa/drivers/dri/r300/r300_fragprog.c | 2 +- src/mesa/drivers/dri/r300/r300_state.c | 2 ++ src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index ff59ae7ecf..41417f3122 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -614,7 +614,8 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - void ( *TranslateFragmentShader)(GLcontext *ctx, struct gl_fragment_program *fp); + void (* TranslateFragmentShader)(GLcontext *ctx, struct gl_fragment_program *fp); + GLboolean (* FragmentProgramEmit)(struct r300_fragment_program_compiler *compiler); GLboolean (* SetupPixelShader)(GLcontext *ctx); }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index eae4c46f69..d58b092122 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -452,7 +452,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) _mesa_print_program(compiler.program); } - if (!r300FragmentProgramEmit(&compiler)) + if (!r300->vtbl.FragmentProgramEmit(&compiler)) r300_fp->error = GL_TRUE; /* Subtle: Rescue any parameters that have been added during transformations */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 493c4be6a0..95380a2870 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2583,10 +2583,12 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; r300->vtbl.TranslateFragmentShader = r500TranslateFragmentShader; + r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; r300->vtbl.TranslateFragmentShader = r300TranslateFragmentShader; + r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit; } } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 526a0ea928..3e21e0f497 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -491,7 +491,7 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) _mesa_print_program(compiler.program); } - if (!r500FragmentProgramEmit(&compiler)) + if (!r300->vtbl.FragmentProgramEmit(&compiler)) r300_fp->error = GL_TRUE; r300_fp->translated = GL_TRUE; -- cgit v1.2.3 From 155cc1647fb7ec488fb1d93ba68bc2523ffee381 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 04:00:51 +0200 Subject: r300: more prepare for merge --- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_fragprog.c | 10 +++++----- src/mesa/drivers/dri/r300/r300_fragprog.h | 5 +---- src/mesa/drivers/dri/r300/r300_state.c | 2 ++ src/mesa/drivers/dri/r300/r500_fragprog.c | 21 +++++++-------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 1 + 6 files changed, 17 insertions(+), 23 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 41417f3122..904218fde2 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -616,6 +616,7 @@ struct r300_vtable { void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); void (* TranslateFragmentShader)(GLcontext *ctx, struct gl_fragment_program *fp); GLboolean (* FragmentProgramEmit)(struct r300_fragment_program_compiler *compiler); + void (* FragmentProgramDump)(union rX00_fragment_program_code *code); GLboolean (* SetupPixelShader)(GLcontext *ctx); }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index d58b092122..a8b885a4c6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -464,19 +464,19 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) r300_fp->translated = GL_TRUE; - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300FragmentProgramDump(r300_fp, &r300_fp->code.r300); r300UpdateStateParameters(ctx, _NEW_PROGRAM); + + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&r300_fp->code); } update_params(ctx, fp); } /* just some random things... */ -void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code) +void r300FragmentProgramDump(union rX00_fragment_program_code *c) { + struct r300_fragment_program_code *code = &c->r300; int n, i, j; static int pc = 0; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 631e40913c..5c2cb312af 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -114,9 +114,6 @@ extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_progr extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code); +extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); #endif diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 95380a2870..64b462bc90 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2584,11 +2584,13 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; r300->vtbl.TranslateFragmentShader = r500TranslateFragmentShader; r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit; + r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; r300->vtbl.TranslateFragmentShader = r300TranslateFragmentShader; r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit; + r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 3e21e0f497..3b45eee537 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -435,8 +435,6 @@ static void build_state( } } -static void dump_program(struct r500_fragment_program_code *code); - void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -494,24 +492,19 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) if (!r300->vtbl.FragmentProgramEmit(&compiler)) r300_fp->error = GL_TRUE; - r300_fp->translated = GL_TRUE; - /* Subtle: Rescue any parameters that have been added during transformations */ _mesa_free_parameter_list(fp->Base.Parameters); fp->Base.Parameters = compiler.program->Parameters; compiler.program->Parameters = 0; - _mesa_reference_program(ctx, &compiler.program, 0); + _mesa_reference_program(ctx, &compiler.program, NULL); - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + r300_fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - if (!r300_fp->error) { - _mesa_printf("Machine-readable code:\n"); - dump_program(&r300_fp->code.r500); - } - } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&r300_fp->code); } update_params(ctx, fp); @@ -615,9 +608,9 @@ static char *to_texop(int val) return NULL; } -static void dump_program(struct r500_fragment_program_code *code) +void r500FragmentProgramDump(union rX00_fragment_program_code *c) { - + struct r500_fragment_program_code *code = &c->r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 4e72ef7aac..5bda0d1d0d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -49,4 +49,5 @@ extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_progr extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); +extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); #endif -- cgit v1.2.3 From 300661d12a1f0ab6c81b087a2ca8c4655abf7066 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 12:39:13 +0200 Subject: r300: more r300/r500 unification reuse insert_WPOS_trailer function --- src/mesa/drivers/dri/r300/r300_fragprog.c | 4 +- src/mesa/drivers/dri/r300/r300_fragprog.h | 7 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 107 ++---------------------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 5 ++ 4 files changed, 13 insertions(+), 110 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index a8b885a4c6..13a1c300dd 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -263,10 +263,8 @@ static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) * All other code pieces that reference that input will be rewritten * to read from a newly allocated temporary. * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { GLuint InputsRead = compiler->fp->Base.Base.InputsRead; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 5c2cb312af..08f6584383 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -105,12 +105,9 @@ #endif -extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); +extern void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler); -/** - * Used internally by the r300 fragment program code to store compile-time - * only data. - */ +extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 3b45eee537..eb93648e2f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -29,6 +29,7 @@ #include "radeon_nqssadce.h" #include "radeon_program_alu.h" +#include "r300_fragprog.h" static void reset_srcreg(struct prog_src_register* reg) @@ -197,110 +198,13 @@ static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) } -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. - */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->Base.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - static void nqssadce_init(struct nqssadce_state* s) { s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } -static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { GLuint relevant; int i; @@ -366,8 +270,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) * The only thing we *cannot* do in an ALU instruction is per-component * negation. Therefore, we split the MOV into two instructions when necessary. */ -static void nqssadce_build_swizzle(struct nqssadce_state *s, - struct prog_dst_register dst, struct prog_src_register src) +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; @@ -478,8 +381,8 @@ void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) struct radeon_nqssadce_descr nqssadce = { .Init = &nqssadce_init, - .IsNativeSwizzle = &is_native_swizzle, - .BuildSwizzle = &nqssadce_build_swizzle, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle, .RewriteDepthOut = GL_TRUE }; radeonNqssaDce(ctx, compiler.program, &nqssadce); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 5bda0d1d0d..2e14098f5d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -44,10 +44,15 @@ #include "r300_context.h" #include "r300_state.h" #include "radeon_program.h" +#include "radeon_nqssadce.h" extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); + +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); + +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); #endif -- cgit v1.2.3 From a2d49eeaebcb9d5869e6f6d57d0aa050a825d8b6 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 13:35:43 +0200 Subject: r300: move common fp functions to seperate file --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r300_context.h | 1 - src/mesa/drivers/dri/r300/r300_fragprog.c | 244 +------------------- src/mesa/drivers/dri/r300/r300_fragprog.h | 6 +- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 282 +++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_fragprog_common.h | 35 +++ src/mesa/drivers/dri/r300/r300_render.c | 5 +- src/mesa/drivers/dri/r300/r300_shader.c | 31 ++- src/mesa/drivers/dri/r300/r300_state.c | 7 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 132 +---------- src/mesa/drivers/dri/r300/r500_fragprog.h | 4 +- 11 files changed, 358 insertions(+), 390 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_common.c create mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_common.h (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 0dff9a1273..62715e3b50 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -48,6 +48,7 @@ DRIVER_SOURCES = \ radeon_program_pair.c \ radeon_nqssadce.c \ r300_vertprog.c \ + r300_fragprog_common.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ r300_fragprog_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 904218fde2..949a3ca45a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -614,7 +614,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - void (* TranslateFragmentShader)(GLcontext *ctx, struct gl_fragment_program *fp); GLboolean (* FragmentProgramEmit)(struct r300_fragment_program_compiler *compiler); void (* FragmentProgramDump)(union rX00_fragment_program_code *code); GLboolean (* SetupPixelShader)(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 13a1c300dd..825246687f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -25,21 +25,6 @@ * */ -/** - * \file - * - * Fragment program compiler. Perform transformations on the intermediate - * representation until the program is in a form where we can translate - * it more or less directly into machine-readable form. - * - * \author Ben Skeggs - * \author Jerome Glisse - */ - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" @@ -49,8 +34,6 @@ #include "r300_state.h" #include "radeon_nqssadce.h" -#include "radeon_program_alu.h" - static void reset_srcreg(struct prog_src_register* reg) { @@ -81,7 +64,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * \todo If/when r5xx uses the radeon_program architecture, this can probably * be reused. */ -static GLboolean transform_TEX( +GLboolean r300_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { @@ -246,231 +229,6 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) -{ - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (fp->Base.Parameters) - _mesa_load_state_parameters(ctx, fp->Base.Parameters); -} - - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - */ -void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->Base.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - - -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r300_fragment_program *fp, - struct r300_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->Base.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - - -void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; - struct r300_fragment_program_external_state state; - - build_state(r300, r300_fp, &state); - if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - r300_fp->translated = GL_FALSE; - _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); - } - - if (!r300_fp->translated) { - struct r300_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = r300_fp; - compiler.code = &r300_fp->code; - compiler.program = _mesa_clone_program(ctx, &fp->Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - if (!r300->vtbl.FragmentProgramEmit(&compiler)) - r300_fp->error = GL_TRUE; - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->Base.Parameters); - fp->Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(ctx, &compiler.program, NULL); - - r300_fp->translated = GL_TRUE; - - r300UpdateStateParameters(ctx, _NEW_PROGRAM); - - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&r300_fp->code); - } - - update_params(ctx, fp); -} - /* just some random things... */ void r300FragmentProgramDump(union rX00_fragment_program_code *c) { diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 08f6584383..0713810ade 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -105,12 +105,10 @@ #endif -extern void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler); - -extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); - extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); +extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c new file mode 100644 index 0000000000..953d920d1f --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2009 Maciej Cencora + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. + * + * \author Ben Skeggs + * \author Jerome Glisse + */ + +#include "r300_fragprog_common.h" + +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + +#include "radeon_program.h" +#include "radeon_program_alu.h" + +static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) +{ + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (fp->Base.Parameters) + _mesa_load_state_parameters(ctx, fp->Base.Parameters); +} + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->Base.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + +void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; + struct r300_fragment_program_external_state state; + + build_state(r300, r300_fp, &state); + if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + r300_fp->translated = GL_FALSE; + _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); + } + + if (!r300_fp->translated) { + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = r300_fp; + compiler.code = &r300_fp->code; + compiler.program = _mesa_clone_program(ctx, &fp->Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + } + + insert_WPOS_trailer(&compiler); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 3, transformations); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + } + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + } + + if (!r300->vtbl.FragmentProgramEmit(&compiler)) + r300_fp->error = GL_TRUE; + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->Base.Parameters); + fp->Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(ctx, &compiler.program, NULL); + + r300_fp->translated = GL_TRUE; + + r300UpdateStateParameters(ctx, _NEW_PROGRAM); + + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&r300_fp->code); + } + + update_params(ctx, fp); +} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h new file mode 100644 index 0000000000..85ea86fecb --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2009 Maciej Cencora + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_COMMON_H_ +#define __R300_FRAGPROG_COMMON_H_ + +#include "main/mtypes.h" + +extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 91f58ade59..41b5e30be4 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -72,7 +72,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_fragprog.h" +#include "r300_fragprog_common.h" + extern int future_hw_tcl_on; /** @@ -432,7 +433,7 @@ static int r300Fallback(GLcontext * ctx) struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; if (fp && !fp->translated) { - r300->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); FALLBACK_IF(fp->error); } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 68fd8cd21e..0133b83796 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -1,10 +1,36 @@ +/* + * Copyright 2009 Maciej Cencora + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ #include "main/glheader.h" #include "shader/program.h" #include "tnl/tnl.h" #include "r300_context.h" -#include "r300_fragprog.h" +#include "r300_fragprog_common.h" static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) @@ -59,10 +85,9 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; if (!fp->translated) - rmesa->vtbl.TranslateFragmentShader(ctx, &fp->Base); + r300TranslateFragmentShader(ctx, &fp->Base); return !fp->error; } else diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 64b462bc90..4cbbfd49c9 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -59,8 +59,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_tex.h" +#include "r300_fragprog_common.h" +#include "r300_fragprog.h" #include "r500_fragprog.h" #include "drirenderbuffer.h" @@ -2458,7 +2459,7 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; } - rmesa->vtbl.TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); if (!rmesa->vtbl.SetupPixelShader(ctx)) return; @@ -2582,14 +2583,12 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.TranslateFragmentShader = r500TranslateFragmentShader; r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit; r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.TranslateFragmentShader = r300TranslateFragmentShader; r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit; r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index eb93648e2f..6de92effe6 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -31,7 +31,6 @@ #include "radeon_program_alu.h" #include "r300_fragprog.h" - static void reset_srcreg(struct prog_src_register* reg) { _mesa_bzero(reg, sizeof(*reg)); @@ -59,7 +58,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * - introduce a temporary register when write masks are needed * */ -static GLboolean transform_TEX( +GLboolean r500_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { @@ -189,21 +188,6 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) -{ - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (fp->Base.Parameters) - _mesa_load_state_parameters(ctx, fp->Base.Parameters); -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { GLuint relevant; @@ -299,120 +283,6 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r300_fragment_program *fp, - struct r300_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->Base.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - -void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; - struct r300_fragment_program_external_state state; - - build_state(r300, r300_fp, &state); - if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - r300_fp->translated = GL_FALSE; - _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); - } - - if (!r300_fp->translated) { - struct r300_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = r300_fp; - compiler.code = &r300_fp->code; - compiler.program = _mesa_clone_program(ctx, &fp->Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - if (!r300->vtbl.FragmentProgramEmit(&compiler)) - r300_fp->error = GL_TRUE; - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->Base.Parameters); - fp->Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(ctx, &compiler.program, NULL); - - r300_fp->translated = GL_TRUE; - - r300UpdateStateParameters(ctx, _NEW_PROGRAM); - - if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&r300_fp->code); - } - - update_params(ctx, fp); - -} static char *toswiz(int swiz_val) { switch(swiz_val) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 2e14098f5d..c7e313774c 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -46,8 +46,6 @@ #include "radeon_program.h" #include "radeon_nqssadce.h" -extern void r500TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); - extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); @@ -55,4 +53,6 @@ extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); #endif -- cgit v1.2.3 From 78878a13fe9cc5dea36c6427c99c7fe17391dbfb Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 18 Apr 2009 13:37:30 +0200 Subject: r300: cleanup includes --- src/mesa/drivers/dri/r300/r300_fragprog.c | 7 ++----- src/mesa/drivers/dri/r300/r300_fragprog.h | 3 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 5 +++++ src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ---- src/mesa/drivers/dri/r300/r500_fragprog.h | 8 +------- 5 files changed, 8 insertions(+), 19 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 825246687f..921ca33c75 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -25,15 +25,12 @@ * */ +#include "r300_fragprog.h" + #include "shader/prog_parameter.h" -#include "shader/prog_print.h" #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" -#include "r300_state.h" - -#include "radeon_nqssadce.h" static void reset_srcreg(struct prog_src_register* reg) { diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 0713810ade..affa022a5c 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/program.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 953d920d1f..3d4bd5db21 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -38,6 +38,11 @@ #include "r300_fragprog_common.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_state.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 6de92effe6..e9c0d89dd4 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,10 +27,6 @@ #include "r500_fragprog.h" -#include "radeon_nqssadce.h" -#include "radeon_program_alu.h" -#include "r300_fragprog.h" - static void reset_srcreg(struct prog_src_register* reg) { _mesa_bzero(reg, sizeof(*reg)); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index c7e313774c..9ca2f9be51 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -33,17 +33,10 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/program.h" #include "shader/prog_instruction.h" #include "r300_context.h" -#include "r300_state.h" -#include "radeon_program.h" #include "radeon_nqssadce.h" extern GLboolean r500FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); @@ -55,4 +48,5 @@ extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register r extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + #endif -- cgit v1.2.3 From b4ebd1c191e6760b334c35fa1df025ad129cc449 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 5 Jun 2009 18:00:58 +0200 Subject: r300: hw doesn't support saturation for tex instructions --- src/mesa/drivers/dri/r300/r300_fragprog.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index ea530fd00e..55c1cfe631 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -140,6 +140,8 @@ GLboolean r300_transform_TEX( inst.DstReg.Index = tempreg; inst.DstReg.WriteMask = WRITEMASK_XYZW; destredirect = GL_TRUE; + } else if (inst.SaturateMode) { + destredirect = GL_TRUE; } } @@ -219,6 +221,7 @@ GLboolean r300_transform_TEX( tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; + tgt->SaturateMode = inst.SaturateMode; tgt->SrcReg[0].File = PROGRAM_TEMPORARY; tgt->SrcReg[0].Index = inst.DstReg.Index; } -- cgit v1.2.3 From e5bed439be4fd7c3a349aedc4bff7eec4e4d363e Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 15 Jul 2009 17:36:42 +0200 Subject: r300: Detangle fragment program compiler from driver-specific structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation of sharing the fragment program compiler with Gallium: Compiler code is moved into its own directory and modified so that it no longer depends on driver structures. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/Makefile | 22 +- src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 450 +++++++++ src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 49 + .../drivers/dri/r300/compiler/r300_fragprog_emit.c | 345 +++++++ .../dri/r300/compiler/r300_fragprog_swizzle.c | 225 +++++ .../dri/r300/compiler/r300_fragprog_swizzle.h | 42 + src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 315 ++++++ src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 482 ++++++++++ src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 52 + .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 329 +++++++ .../drivers/dri/r300/compiler/radeon_compiler.h | 162 ++++ .../drivers/dri/r300/compiler/radeon_nqssadce.c | 297 ++++++ .../drivers/dri/r300/compiler/radeon_nqssadce.h | 93 ++ .../drivers/dri/r300/compiler/radeon_program.c | 128 +++ .../drivers/dri/r300/compiler/radeon_program.h | 131 +++ .../drivers/dri/r300/compiler/radeon_program_alu.c | 635 +++++++++++++ .../drivers/dri/r300/compiler/radeon_program_alu.h | 53 ++ .../dri/r300/compiler/radeon_program_pair.c | 1004 ++++++++++++++++++++ .../dri/r300/compiler/radeon_program_pair.h | 126 +++ src/mesa/drivers/dri/r300/r300_context.h | 136 +-- src/mesa/drivers/dri/r300/r300_fragprog.c | 451 --------- src/mesa/drivers/dri/r300/r300_fragprog.h | 111 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 280 +----- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 344 ------- src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c | 225 ----- src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h | 42 - src/mesa/drivers/dri/r300/r300_ioctl.c | 63 +- src/mesa/drivers/dri/r300/r300_state.c | 16 +- src/mesa/drivers/dri/r300/r300_swtcl.c | 8 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 14 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 480 ---------- src/mesa/drivers/dri/r300/r500_fragprog.h | 52 - src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 327 ------- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 297 ------ src/mesa/drivers/dri/r300/radeon_nqssadce.h | 93 -- src/mesa/drivers/dri/r300/radeon_program.c | 128 --- src/mesa/drivers/dri/r300/radeon_program.h | 131 --- src/mesa/drivers/dri/r300/radeon_program_alu.c | 635 ------------- src/mesa/drivers/dri/r300/radeon_program_alu.h | 53 -- src/mesa/drivers/dri/r300/radeon_program_pair.c | 1004 -------------------- src/mesa/drivers/dri/r300/radeon_program_pair.h | 126 --- src/mesa/drivers/dri/radeon/radeon_screen.c | 4 +- 42 files changed, 5026 insertions(+), 4934 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_compiler.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.h delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_emit.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog_emit.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.h (limited to 'src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7460410ee6..3a7de6d5be 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -38,6 +38,18 @@ RADEON_COMMON_SOURCES = \ radeon_span.c \ radeon_fbo.c +RADEON_COMPILER_SOURCES = \ + compiler/radeon_nqssadce.c \ + compiler/radeon_program.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + DRIVER_SOURCES = \ radeon_screen.c \ r300_context.c \ @@ -48,21 +60,13 @@ DRIVER_SOURCES = \ r300_render.c \ r300_tex.c \ r300_texstate.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog_common.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ $(RADEON_COMMON_SOURCES) \ + $(RADEON_COMPILER_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..00ef964571 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "compiler/r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "r300_reg.h" + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +GLboolean r300_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonFindFreeTemporary(t); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); + + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MUL; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } else if (inst.SaturateMode) { + destredirect = GL_TRUE; + } + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SaturateMode = inst.SaturateMode; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (code->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d, flags: %08x\n", n, + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end, + code->node[n].flags); + + if (n > 0 || code->first_node_has_tex) { + fprintf(stderr, " TEX:\n"); + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..186fad6490 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_program.h" + + +extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 0000000000..1cfb565b6e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo FogOption + */ + +#include "compiler/r300_fragprog.h" + +#include "r300_reg.h" + +#include "compiler/radeon_program_pair.h" +#include "compiler/r300_fragprog_swizzle.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == index) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= R300_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = index; + } + + return GL_TRUE; +} + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + + +static GLuint translate_rgb_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTC_CMP; + case OPCODE_DP3: return R300_ALU_OUTC_DP3; + case OPCODE_DP4: return R300_ALU_OUTC_DP4; + case OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTC_MAD; + case OPCODE_MAX: return R300_ALU_OUTC_MAX; + case OPCODE_MIN: return R300_ALU_OUTC_MIN; + case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static GLuint translate_alpha_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTA_CMP; + case OPCODE_DP3: return R300_ALU_OUTA_DP4; + case OPCODE_DP4: return R300_ALU_OUTA_DP4; + case OPCODE_EX2: return R300_ALU_OUTA_EX2; + case OPCODE_FRC: return R300_ALU_OUTA_FRC; + case OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTA_MAD; + case OPCODE_MAX: return R300_ALU_OUTA_MAX; + case OPCODE_MIN: return R300_ALU_OUTA_MIN; + case OPCODE_RCP: return R300_ALU_OUTA_RCP; + case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } + + int ip = code->alu.length++; + int j; + code->node[code->cur_node].alu_end++; + + code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); + code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + if (!inst->RGB.Src[j].Constant) + use_temporary(code, inst->RGB.Src[j].Index); + code->alu.inst[ip].inst1 |= src << (6*j); + + src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); + if (!inst->Alpha.Src[j].Constant) + use_temporary(code, inst->Alpha.Src[j].Index); + code->alu.inst[ip].inst3 |= src << (6*j); + + GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].inst0 |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].inst2 |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].inst1 |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].inst3 |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; + code->node[code->cur_node].flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static GLboolean finish_node(struct r300_fragment_program_compiler *c) +{ + struct r300_fragment_program_code *code = &c->code->code.r300; + struct r300_fragment_program_node *node = &code->node[code->cur_node]; + + if (node->alu_end < 0) { + /* Generate a single NOP for this node */ + struct radeon_pair_instruction inst; + _mesa_bzero(&inst, sizeof(inst)); + if (!emit_alu(c, &inst)) + return GL_FALSE; + } + + if (node->tex_end < 0) { + if (code->cur_node == 0) { + node->tex_end = 0; + } else { + error("Node %i has no TEX instructions", code->cur_node); + return GL_FALSE; + } + } else { + if (code->cur_node == 0) + code->first_node_has_tex = 1; + } + + return GL_TRUE; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static GLboolean begin_tex(void* data) +{ + PROG_CODE; + + if (code->cur_node == 0) { + if (code->node[0].alu_end < 0 && + code->node[0].tex_end < 0) + return GL_TRUE; + } + + if (code->cur_node == 3) { + error("Too many texture indirections"); + return GL_FALSE; + } + + if (!finish_node(c)) + return GL_FALSE; + + struct r300_fragment_program_node *node = &code->node[++code->cur_node]; + node->alu_offset = code->alu.length; + node->alu_end = -1; + node->tex_offset = code->tex.length; + node->tex_end = -1; + return GL_TRUE; +} + + +static GLboolean emit_tex(void* data, struct prog_instruction* inst) +{ + PROG_CODE; + + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } + + GLuint unit = inst->TexSrcUnit; + GLuint dest = inst->DstReg.Index; + GLuint opcode; + + switch(inst->Opcode) { + case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %i", inst->Opcode); + return GL_FALSE; + } + + if (inst->Opcode == OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->SrcReg[0].Index); + + code->node[code->cur_node].tex_end++; + code->tex.inst[code->tex.length++] = + (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return GL_TRUE; +} + + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = &emit_const, + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, + .MaxHwTemps = R300_PFS_NUM_TEMP_REGS +}; + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program_code *code = &compiler->code->code.r300; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + + if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if (!finish_node(compiler)) + return GL_FALSE; + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 0000000000..fc9d855bce --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include "r300_reg.h" +#include "radeon_nqssadce.h" + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) + +struct swizzle_data { + GLuint hash; /**< swizzle value this matches */ + GLuint base; /**< base value for hw swizzle */ + GLuint stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + GLuint swz = GET_SWZ(swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + if (reg.Abs) + reg.Negate = NEGATE_NONE; + + if (opcode == OPCODE_KIL || + opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP) { + int j; + + if (reg.Abs || reg.Negate) + return GL_FALSE; + + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(reg.Swizzle, j); + if (swz == SWIZZLE_NIL) + continue; + if (swz != j) + return GL_FALSE; + } + + return GL_TRUE; + } + + GLuint relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + if (!lookup_native_swizzle(reg.Swizzle)) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Generate MOV dst, src using only native swizzles. + */ +void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + if (src.Abs) + src.Negate = NEGATE_NONE; + + while(dst.WriteMask) { + const struct swizzle_data *best_swizzle = 0; + GLuint best_matchcount = 0; + GLuint best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + GLuint matchcount = 0; + GLuint matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(dst.WriteMask, comp)) + continue; + GLuint swz = GET_SWZ(src.Swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + break; + } + } + + struct prog_instruction *inst; + + _mesa_insert_instructions(s->Program, s->IP, 1); + inst = s->Program->Instructions + s->IP++; + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + + dst.WriteMask &= ~inst->DstReg.WriteMask; + } +} + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + _mesa_printf("Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 0000000000..231bf4eef5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "main/glheader.h" +#include "shader/prog_instruction.h" + +struct nqssadce_state; + +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..75abdcfc42 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,315 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->program->InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) { + compiler->code->wpos_attr = FRAG_ATTRIB_MAX; + return; + } + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); + InputsRead |= 1 << i; + compiler->program->InputsRead = InputsRead; + compiler->code->wpos_attr = i; + break; + } + } + + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + i = 0; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = compiler->code->wpos_attr; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = compiler->code->wpos_attr; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + + +/** + * Rewrite fragment.fogcoord to use a texture coordinate slot. + * Note that fogcoord is forced into an X001 pattern, and this enforcement + * is done here. + * + * See also the counterpart rewriting for vertex programs. + */ +static void rewriteFog(struct r300_fragment_program_compiler *compiler) +{ + struct rX00_fragment_program_code *code = compiler->code; + GLuint InputsRead = compiler->program->InputsRead; + int i; + + if (!(InputsRead & FRAG_BIT_FOGC)) { + code->fog_attr = FRAG_ATTRIB_MAX; + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); + InputsRead |= 1 << i; + compiler->program->InputsRead = InputsRead; + code->fog_attr = i; + break; + } + } + + { + struct prog_instruction *inst; + + inst = compiler->program->Instructions; + while (inst->Opcode != OPCODE_END) { + const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { + inst->SrcReg[i].Index = code->fog_attr; + inst->SrcReg[i].Swizzle = combine_swizzles( + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), + inst->SrcReg[i].Swizzle); + } + } + ++inst; + } + } +} + + +static void rewrite_depth_out(struct gl_program *prog) +{ + struct prog_instruction *inst; + + for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + GLboolean success = GL_FALSE; + + if (c->debug) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + insert_WPOS_trailer(c); + + rewriteFog(c); + + rewrite_depth_out(c->program); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(c->ctx, c->program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(c->ctx, c->program, 3, transformations); + } + + if (c->debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(c->ctx, c->program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(c->ctx, c->program, &nqssadce); + } + + if (c->debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + if (c->is_r500) { + success = r500BuildFragmentProgramHwCode(c); + } else { + success = r300BuildFragmentProgramHwCode(c); + } + + if (!success || c->debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } + + return success; +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c new file mode 100644 index 0000000000..fdc18caacb --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -0,0 +1,482 @@ +/* + * Copyright 2008 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "compiler/r500_fragprog.h" + +#include "r300_reg.h" + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + */ +GLboolean r500_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) + return GL_FALSE; + + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + + if (reg.Negate) + return GL_FALSE; + + return GL_TRUE; + } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) + return GL_TRUE; + + return GL_FALSE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; + } +} + +/** + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); + inst = s->Program->Instructions + s->IP; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask = negatebase[i]; + inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + inst++; + s->IP++; + } +} + + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r500_fragment_program_code *code = &c->code.r500; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + if (code->const_nr) { + fprintf(stderr, "--------\nConstants:\n"); + for (n = 0; n < code->const_nr; n++) { + fprintf(stderr, "Constant %d: %i[%i]\n", n, + code->constant[n].File, code->constant[n].Index); + } + fprintf(stderr, "--------\n"); + } + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h new file mode 100644 index 0000000000..232993f581 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "shader/prog_parameter.h" +#include "shader/prog_instruction.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_nqssadce.h" + +extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); + +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 0000000000..237489e119 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \author Corbin Simpson + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "compiler/r500_fragprog.h" + +#include "r300_reg.h" + +#include "compiler/radeon_program_pair.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Callback to register hardware constants. + */ +static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == idx) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= R500_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = idx; + } + + return GL_TRUE; +} + +static GLuint translate_rgb_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static GLuint translate_alpha_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALPHA_OP_CMP; + case OPCODE_COS: return R500_ALPHA_OP_COS; + case OPCODE_DDX: return R500_ALPHA_OP_MDH; + case OPCODE_DDY: return R500_ALPHA_OP_MDV; + case OPCODE_DP3: return R500_ALPHA_OP_DP; + case OPCODE_DP4: return R500_ALPHA_OP_DP; + case OPCODE_EX2: return R500_ALPHA_OP_EX2; + case OPCODE_FRC: return R500_ALPHA_OP_FRC; + case OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALPHA_OP_MAD; + case OPCODE_MAX: return R500_ALPHA_OP_MAX; + case OPCODE_MIN: return R500_ALPHA_OP_MIN; + case OPCODE_RCP: return R500_ALPHA_OP_RCP; + case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + +static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +{ + GLuint t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +{ + GLuint t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (!src.Constant) + use_temporary(code, src.Index); + return src.Index | src.Constant << 8; +} + + +/** + * Emit a paired ALU instruction. + */ +static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + else + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = GL_TRUE; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + return GL_TRUE; +} + +static GLuint translate_strq_swizzle(struct prog_src_register src) +{ + GLuint swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static GLboolean emit_tex(void *data, struct prog_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %x\n", inst->Opcode); + } + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0]) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return GL_TRUE; +} + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = emit_const, + .EmitPaired = emit_paired, + .EmitTex = emit_tex, + .MaxHwTemps = 128 +}; + +GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_offset = 0; + code->inst_end = -1; + + if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + error("Introducing fake OUT: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e1a691db4f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,162 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) +#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) + + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + GLuint inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + struct r300_fragment_program_node node[4]; + int cur_node; + int first_node_has_tex; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + + +struct r500_fragment_program_code { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_offset; + int inst_end; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + GLboolean writes_depth; + + /* attribute that we are sending the WPOS in */ + gl_frag_attrib wpos_attr; + /* attribute that we are sending the fog coordinate in */ + gl_frag_attrib fog_attr; +}; + +struct r300_fragment_program_compiler { + GLcontext * ctx; + struct rX00_fragment_program_code *code; + struct gl_program *program; + struct r300_fragment_program_external_state state; + GLboolean is_r500; + GLboolean debug; +}; + +GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c new file mode 100644 index 0000000000..202a8532b6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + case PROGRAM_ADDRESS: return &s->Address; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = NEGATE_NONE; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, + struct prog_instruction *inst, GLint src, GLuint sourced) +{ + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst = s->Program->Instructions + s->IP; + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; + inst->SrcReg[src].Abs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate; + + if (inst->SrcReg[src].RelAddr) { + regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { + regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } + + return inst; +} + +static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) + inst->SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + int ip; + for(ip = 0; ip < s->IP; ++ip) { + struct prog_instruction* inst = s->Program->Instructions + ip; + if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) + inst->DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = s->Program->Instructions + s->IP; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + _mesa_delete_instructions(s->Program, s->IP, 1); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + /* Attention: Due to swizzle emulation code, the following + * might change the instruction stream under us, so we have + * to be careful with the inst pointer. */ + switch (inst->Opcode) { + case OPCODE_ARL: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MOV: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + case OPCODE_SGE: + case OPCODE_SLT: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + inst = track_used_srcreg(s, inst, 0, 0x1); + break; + case OPCODE_DP3: + inst = track_used_srcreg(s, inst, 0, 0x7); + inst = track_used_srcreg(s, inst, 1, 0x7); + break; + case OPCODE_DP4: + inst = track_used_srcreg(s, inst, 0, 0xf); + inst = track_used_srcreg(s, inst, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + inst = track_used_srcreg(s, inst, 0, 0xf); + break; + case OPCODE_DST: + inst = track_used_srcreg(s, inst, 0, 0x6); + inst = track_used_srcreg(s, inst, 1, 0xa); + break; + case OPCODE_EXP: + case OPCODE_LOG: + case OPCODE_POW: + inst = track_used_srcreg(s, inst, 0, 0x3); + break; + case OPCODE_LIT: + inst = track_used_srcreg(s, inst, 0, 0xb); + break; + default: + _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } +} + +static void calculateInputsOutputs(struct gl_program *p) +{ + struct prog_instruction *inst; + GLuint InputsRead, OutputsWritten; + + inst = p->Instructions; + InputsRead = 0; + OutputsWritten = 0; + while (inst->Opcode != OPCODE_END) + { + int i, num_src_regs; + + num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < num_src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) + InputsRead |= 1 << inst->SrcReg[i].Index; + } + + if (inst->DstReg.File == PROGRAM_OUTPUT) + OutputsWritten |= 1 << inst->DstReg.Index; + + ++inst; + } + + p->InputsRead = InputsRead; + p->OutputsWritten = OutputsWritten; +} + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = p; + s.Descr = descr; + s.Descr->Init(&s); + s.IP = p->NumInstructions; + + while(s.IP > 0) { + s.IP--; + process_instruction(&s); + } + + calculateInputsOutputs(p); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h new file mode 100644 index 0000000000..8626f21c25 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + GLcontext *Ctx; + struct gl_program *Program; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + int IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Address; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + + void *Data; +}; + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..da5e7aefce --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + GLcontext *Ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_transform_context ctx; + int ip; + + ctx.Ctx = Ctx; + ctx.Program = program; + ctx.OldInstructions = program->Instructions; + ctx.OldNumInstructions = program->NumInstructions; + + program->Instructions = 0; + program->NumInstructions = 0; + + for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { + struct prog_instruction *instr = ctx.OldInstructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&ctx, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction* dest = radeonAppendInstructions(program, 1); + _mesa_copy_instructions(dest, instr, 1); + } + } + + _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); +} + + +static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + const struct prog_instruction *inst = insts + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } +} + +GLint radeonFindFreeTemporary(struct radeon_transform_context *t) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + _mesa_memset(used, 0, sizeof(used)); + scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); + scan_instructions(used, t->OldInstructions, t->OldNumInstructions); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +/** + * Append the given number of instructions to the program and return a + * pointer to the first new instruction. + */ +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) +{ + int oldnum = program->NumInstructions; + _mesa_insert_instructions(program, oldnum, count); + return program->Instructions + oldnum; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h new file mode 100644 index 0000000000..88474d43a2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + + +enum { + CLAUSE_MIXED = 0, + CLAUSE_ALU, + CLAUSE_TEX +}; + +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +enum { + OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + +static inline GLuint get_swz(GLuint swz, GLuint idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline GLuint combine_swizzles(GLuint src, GLuint swz) +{ + GLuint ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + + return ret; +} + + +/** + * Transformation context that is passed to local transformations. + * + * Care must be taken with some operations during transformation, + * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary + */ +struct radeon_transform_context { + GLcontext *Ctx; + struct gl_program *Program; + struct prog_instruction *OldInstructions; + GLuint OldNumInstructions; +}; + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + GLcontext* ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations); + +/** + * Find a usable free temporary register during program transformation + */ +GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); + +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c new file mode 100644 index 0000000000..8283723bad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -0,0 +1,635 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "shader/prog_parameter.h" + + +static struct prog_instruction *emit1(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = NEGATE_NONE; + return newreg; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.Negate = NEGATE_NONE; + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); + emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); +} + +static void transform_FLR(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + static const GLfloat LitConst[4] = { -127.999999 }; + + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + int needTemporary = 0; + struct prog_src_register srctemp; + + constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + + if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = radeonFindFreeTemporary(t); + } else { + temp = inst->DstReg.Index; + } + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(t->Program, OPCODE_MAX, 0, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(t->Program, OPCODE_MIN, 0, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(t->Program, OPCODE_LG2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(t->Program, OPCODE_MUL, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(t->Program, OPCODE_EX2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + if (needTemporary) + emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); +} + +static void transform_LRP(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->SrcReg[1], negate(inst->SrcReg[2])); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, + inst->DstReg, + inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); +} + +static void transform_POW(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); + emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); +} + +static void transform_RSQ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); +} + +static void transform_SGE(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +GLboolean radeonTransformALU(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + + +static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) { + GLuint swz; + constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); + ASSERT(swz == SWIZZLE_NOOP); + } +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx(struct radeon_transform_context* t, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(t->Program, OPCODE_MAD, 0, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = radeonFindFreeTemporary(t); + + sincos_constants(t, constants); + + if (inst->Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->Opcode == OPCODE_SIN) { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->DstReg; + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + return GL_TRUE; +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = radeonFindFreeTemporary(t); + constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->Opcode == OPCODE_COS) { + emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SIN) { + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, + inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->DstReg; + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + return GL_TRUE; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +GLboolean radeonTransformDeriv(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + return GL_FALSE; + + struct prog_src_register B = inst->SrcReg[1]; + + B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, + SWIZZLE_ONE, SWIZZLE_ONE); + B.Negate = NEGATE_XYZW; + + emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], B); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h new file mode 100644 index 0000000000..b45958115c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigSimple( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigScale( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformDeriv( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c new file mode 100644 index 0000000000..d6fb474cf2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -0,0 +1,1004 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Perform temporary register allocation and attempt to pair off instructions + * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction + * vs. ALU instruction scheduling. + */ + +#include "radeon_program_pair.h" + +#include "radeon_common.h" + +#include "shader/prog_print.h" + +#define error(fmt, args...) do { \ + _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + s->Error = GL_TRUE; \ +} while(0) + +struct pair_state_instruction { + GLuint IsTex:1; /**< Is a texture instruction */ + GLuint NeedRGB:1; /**< Needs the RGB ALU */ + GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ + GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + GLuint NumDependencies:5; + + /** + * Next instruction in the linked list of ready instructions. + */ + struct pair_state_instruction *NextReady; + + /** + * Values that this instruction writes + */ + struct reg_value *Values[4]; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + GLuint IP; /**< IP of the instruction that performs this access */ + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * PROGRAM_TEMPORARY. + */ +struct reg_value { + GLuint IP; /**< IP of the instruction that writes this value */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + + /** + * Unordered linked list of instructions that read from this value. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is calculated during @ref scan_instructions + * and continually decremented during code emission. + * When this count reaches zero, the instruction that writes the @ref Next value + * can be scheduled. + */ + GLuint NumReaders; +}; + +/** + * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * to the proper hardware temporary. + */ +struct pair_register_translation { + GLuint Allocated:1; + GLuint HwIndex:8; + GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + + /** + * Notes the value that is currently contained in each component + * (only used for PROGRAM_TEMPORARY registers). + */ + struct reg_value *Value[4]; +}; + +struct pair_state { + GLcontext *Ctx; + struct gl_program *Program; + const struct radeon_pair_handler *Handler; + GLboolean Error; + GLboolean Debug; + GLboolean Verbose; + void *UserData; + + /** + * Translate Mesa registers to hardware registers + */ + struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; + struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + + /** + * Derived information about program instructions. + */ + struct pair_state_instruction *Instructions; + + struct { + GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + } HwTemps[128]; + + /** + * Linked list of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + struct pair_state_instruction *ReadyFullALU; + struct pair_state_instruction *ReadyRGB; + struct pair_state_instruction *ReadyAlpha; + struct pair_state_instruction *ReadyTEX; + + /** + * Pool of @ref reg_value structures for fast allocation. + */ + struct reg_value *ValuePool; + GLuint ValuePoolUsed; + struct reg_value_reader *ReaderPool; + GLuint ReaderPoolUsed; +}; + + +static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_INPUT: return &s->Inputs[index]; + default: return 0; + } +} + +static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +{ + struct pair_register_translation *t = get_register(s, file, index); + ASSERT(!s->HwTemps[hwindex].RefCount); + ASSERT(!t->Allocated); + s->HwTemps[hwindex].RefCount = t->RefCount; + t->Allocated = 1; + t->HwIndex = hwindex; +} + +static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +{ + GLuint hwindex; + + struct pair_register_translation *t = get_register(s, file, index); + if (!t) { + _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + return 0; + } + + if (t->Allocated) + return t->HwIndex; + + for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) + if (!s->HwTemps[hwindex].RefCount) + break; + + if (hwindex >= s->Handler->MaxHwTemps) { + error("Ran out of hardware temporaries"); + return 0; + } + + alloc_hw_reg(s, file, index, hwindex); + return hwindex; +} + + +static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +{ + if (!s->HwTemps[hwindex].RefCount) { + error("Hwindex %i refcount error", hwindex); + return; + } + + s->HwTemps[hwindex].RefCount--; +} + +static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) +{ + pairinst->NextReady = *list; + *list = pairinst; +} + +/** + * The instruction at the given IP has become ready. Link it into the ready + * instructions. + */ +static void instruction_ready(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("instruction_ready(%i)\n", ip); + + if (pairinst->IsTex) + add_pairinst_to_list(&s->ReadyTEX, pairinst); + else if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s->ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s->ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s->ReadyFullALU, pairinst); +} + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +{ + struct prog_src_register tmp; + + switch(inst->Opcode) { + case OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[1].Negate = NEGATE_NONE; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_MUL: + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct pair_state *s, + struct prog_instruction *inst, struct pair_state_instruction *pairinst) +{ + pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + + switch(inst->Opcode) { + case OPCODE_ADD: + case OPCODE_CMP: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + pairinst->IsTranscendent = 1; + pairinst->NeedAlpha = 1; + break; + case OPCODE_DP4: + pairinst->NeedAlpha = 1; + /* fall through */ + case OPCODE_DP3: + pairinst->NeedRGB = 1; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_END: + pairinst->IsTex = 1; + break; + default: + error("Unknown opcode %d\n", inst->Opcode); + break; + } +} + + +/** + * Count which (input, temporary) register is read and written how often, + * and scan the instruction stream to find dependencies. + */ +static void scan_instructions(struct pair_state *s) +{ + struct prog_instruction *inst; + struct pair_state_instruction *pairinst; + GLuint ip; + + for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; + inst->Opcode != OPCODE_END; + ++inst, ++pairinst, ++ip) { + final_rewrite(s, inst); + classify_instruction(s, inst, pairinst); + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int j; + for(j = 0; j < nsrc; j++) { + struct pair_register_translation *t = + get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + if (!t) + continue; + + t->RefCount++; + + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + int i; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + if (swz >= 4) + continue; /* constant or NIL swizzle */ + if (!t->Value[swz]) + continue; /* this is an undefined read */ + + /* Do not add a dependency if this instruction + * also rewrites the value. The code below adds + * a dependency for the DstReg, which is a superset + * of the SrcReg dependency. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[j].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + pairinst->NumDependencies++; + t->Value[swz]->NumReaders++; + r->IP = ip; + r->Next = t->Value[swz]->Readers; + t->Value[swz]->Readers = r; + } + } + } + + int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + if (ndst) { + struct pair_register_translation *t = + get_register(s, inst->DstReg.File, inst->DstReg.Index); + if (t) { + t->RefCount++; + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + int j; + for(j = 0; j < 4; ++j) { + if (!GET_BIT(inst->DstReg.WriteMask, j)) + continue; + + struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; + v->IP = ip; + if (t->Value[j]) { + pairinst->NumDependencies++; + t->Value[j]->Next = v; + } + t->Value[j] = v; + pairinst->Values[j] = v; + } + } + } + } + + if (s->Verbose) + _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + + if (!pairinst->NumDependencies) + instruction_ready(s, ip); + } + + /* Clear the PROGRAM_TEMPORARY state */ + int i, j; + for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(j = 0; j < 4; ++j) + s->Temps[i].Value[j] = 0; + } +} + + +/** + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. + */ +static void allocate_input_registers(struct pair_state *s) +{ + GLuint InputsRead = s->Program->InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + + /* Anything else */ + if (InputsRead) + error("Don't know how to handle inputs 0x%x\n", InputsRead); +} + + +static void decrement_dependencies(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + ASSERT(pairinst->NumDependencies > 0); + if (!--pairinst->NumDependencies) + instruction_ready(s, ip); +} + +/** + * Update the dependency tracking state based on what the instruction + * at the given IP does. + */ +static void commit_instruction(struct pair_state *s, int ip) +{ + struct prog_instruction *inst = s->Program->Instructions + ip; + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("commit_instruction(%i)\n", ip); + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; + deref_hw_reg(s, t->HwIndex); + + int i; + for(i = 0; i < 4; ++i) { + if (!GET_BIT(inst->DstReg.WriteMask, i)) + continue; + + t->Value[i] = pairinst->Values[i]; + if (t->Value[i]->NumReaders) { + struct reg_value_reader *r; + for(r = pairinst->Values[i]->Readers; r; r = r->Next) + decrement_dependencies(s, r->IP); + } else if (t->Value[i]->Next) { + /* This happens when the only reader writes + * the register at the same time */ + decrement_dependencies(s, t->Value[i]->Next->IP); + } + } + } + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; i++) { + struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (!t) + continue; + + deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); + + if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + continue; + + int j; + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz >= 4) + continue; + if (!t->Value[swz]) + continue; + + /* Do not free a dependency if this instruction + * also rewrites the value. See scan_instructions. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[i].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + if (!--t->Value[swz]->NumReaders) { + if (t->Value[swz]->Next) + decrement_dependencies(s, t->Value[swz]->Next->IP); + } + } + } +} + + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + * + * In R500, we don't really know when the result of a texture instruction + * arrives. So allocate all destinations first, to make sure they do not + * arrive early and overwrite a texture coordinate we're going to use later + * in the block. + */ +static void emit_all_tex(struct pair_state *s) +{ + struct pair_state_instruction *readytex; + struct pair_state_instruction *pairinst; + + ASSERT(s->ReadyTEX); + + // Don't let the ready list change under us! + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + // Allocate destination hardware registers in one block to avoid conflicts. + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + if (inst->Opcode != OPCODE_KIL) + get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + } + + if (s->Debug) + _mesa_printf(" BEGIN_TEX\n"); + + if (s->Handler->BeginTexBlock) + s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + commit_instruction(s, ip); + + if (inst->Opcode != OPCODE_KIL) + inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); + + if (s->Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); + fflush(stdout); + } + s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + } + + if (s->Debug) + _mesa_printf(" END_TEX\n"); +} + + +static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, + struct prog_src_register src, GLboolean rgb, GLboolean alpha) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if (!rgb && !alpha) + return 0; + + GLuint constant; + GLuint index; + + if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + constant = 0; + index = get_hw_reg(s, src.File, src.Index); + } else { + constant = 1; + s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].Constant != constant || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].Constant != constant || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); + ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + + if (pairinst->NeedRGB) { + if (pairinst->IsTranscendent) + pair->RGB.Opcode = OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (pairinst->NeedAlpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + int nargs = _mesa_num_inst_src_regs(inst->Opcode); + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) + return GL_FALSE; + else + nargs++; + } + + for(i = 0; i < nargs; ++i) { + int source; + if (pairinst->NeedRGB && !pairinst->IsTranscendent) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + int j; + for(j = 0; j < 3; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + } + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); + } + if (pairinst->NeedAlpha) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); + } + } + + return GL_TRUE; +} + + +/** + * Fill in the destination register information. + * + * This is split from filling in source registers because we want + * to avoid allocating hardware temporaries for destinations until + * we are absolutely certain that we're going to emit a certain + * instruction pairing. + */ +static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + if (pairinst->NeedRGB) { + pair->RGB.DestIndex = hwindex; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + } + if (pairinst->NeedAlpha) { + pair->Alpha.DestIndex = hwindex; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_alu(struct pair_state *s) +{ + struct radeon_pair_instruction pair; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + int ip; + if (s->ReadyFullALU) { + ip = s->ReadyFullALU - s->Instructions; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + ip = s->ReadyAlpha - s->Instructions; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + } else { + struct pair_state_instruction **prgb; + struct pair_state_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + int rgbip = *prgb - s->Instructions; + int alphaip = *palpha - s->Instructions; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, rgbip); + if (!fill_instruction_into_pair(s, &pair, alphaip)) + continue; + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + fill_dest_into_pair(s, &pair, rgbip); + fill_dest_into_pair(s, &pair, alphaip); + commit_instruction(s, rgbip); + commit_instruction(s, alphaip); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + int ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + success: ; + } + + if (s->Debug) + radeonPrintPairInstruction(&pair); + + s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); +} + + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler* handler, void *userdata) +{ + struct pair_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = _mesa_clone_program(ctx, program); + s.Handler = handler; + s.UserData = userdata; + s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; + s.Verbose = GL_FALSE && s.Debug; + + s.Instructions = (struct pair_state_instruction*)_mesa_calloc( + sizeof(struct pair_state_instruction)*s.Program->NumInstructions); + s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); + s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( + sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); + + if (s.Debug) + _mesa_printf("Emit paired program\n"); + + scan_instructions(&s); + allocate_input_registers(&s); + + while(!s.Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s); + + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) + emit_alu(&s); + } + + if (s.Debug) + _mesa_printf(" END\n"); + + _mesa_free(s.Instructions); + _mesa_free(s.ValuePool); + _mesa_free(s.ReaderPool); + + _mesa_reference_program(ctx, &s.Program, NULL); + + return !s.Error; +} + + +static void print_pair_src(int i, struct radeon_pair_instruction_source* src) +{ + _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); +} + +static const char* opcode_string(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return "SOP"; + else + return _mesa_opcode_string(opcode); +} + +static int num_pairinst_args(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return 0; + else + return _mesa_num_inst_src_regs(opcode); +} + +static char swizzle_char(GLuint swz) +{ + switch(swz) { + case SWIZZLE_X: return 'x'; + case SWIZZLE_Y: return 'y'; + case SWIZZLE_Z: return 'z'; + case SWIZZLE_W: return 'w'; + case SWIZZLE_ZERO: return '0'; + case SWIZZLE_ONE: return '1'; + case SWIZZLE_NIL: return '_'; + default: return '?'; + } +} + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) +{ + int nargs; + int i; + + _mesa_printf(" RGB: "); + for(i = 0; i < 3; ++i) { + if (inst->RGB.Src[i].Used) + print_pair_src(i, inst->RGB.Src + i); + } + _mesa_printf("\n"); + _mesa_printf(" Alpha:"); + for(i = 0; i < 3; ++i) { + if (inst->Alpha.Src[i].Used) + print_pair_src(i, inst->Alpha.Src + i); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + _mesa_printf(" COLOR.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + nargs = num_pairinst_args(inst->RGB.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), + abs); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + _mesa_printf(" COLOR.w"); + if (inst->Alpha.DepthWriteMask) + _mesa_printf(" DEPTH.w"); + nargs = num_pairinst_args(inst->Alpha.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); + } + _mesa_printf("\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h new file mode 100644 index 0000000000..4624a24629 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_program.h" + + +/** + * Represents a paired instruction, as found in R300 and R500 + * fragment programs. + */ +struct radeon_pair_instruction_source { + GLuint Index:8; + GLuint Constant:1; + GLuint Used:1; +}; + +struct radeon_pair_instruction_rgb { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:3; + GLuint OutputWriteMask:3; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:9; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:1; + GLuint OutputWriteMask:1; + GLuint DepthWriteMask:1; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:3; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; +}; + + +/** + * + */ +struct radeon_pair_handler { + /** + * Fill in the proper hardware index for the given constant register. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); + + /** + * Write a paired instruction to the hardware. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + + /** + * Write a texture instruction to the hardware. + * Register indices have already been rewritten to the allocated + * hardware register numbers. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitTex)(void*, struct prog_instruction*); + + /** + * Called before a block of contiguous, independent texture + * instructions is emitted. + */ + GLboolean (*BeginTexBlock)(void*); + + GLuint MaxHwTemps; +}; + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler*, void *userdata); + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f7af7d4e57..b692f8bf80 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "shader/prog_instruction.h" +#include "compiler/radeon_compiler.h" struct r300_context; typedef struct r300_context r300ContextRec; @@ -396,9 +397,6 @@ struct r300_hw_state { /* Can be tested with colormat currently. */ #define VSF_MAX_FRAGMENT_TEMPS (14) -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" @@ -413,7 +411,7 @@ struct r300_vertex_program { GLuint FogAttr; GLuint WPosAttr; } key; - + struct r300_vertex_shader_hw_code { int length; union { @@ -441,110 +439,6 @@ struct r300_vertex_program_cont { struct r300_vertex_program *progs; }; -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 - -struct r300_pfs_compile_state; -struct r500_pfs_compile_state; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - int length; /**< total # of texture instructions used */ - GLuint inst[R300_PFS_MAX_TEX_INST]; - } tex; - - struct { - int length; /**< total # of ALU instructions used */ - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - } inst[R300_PFS_MAX_ALU_INST]; - } alu; - - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; - - -struct r500_fragment_program_code { - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - GLuint inst4; - GLuint inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_offset; - int inst_end; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; /** * Store everything about a fragment program that is needed @@ -555,22 +449,10 @@ struct r300_fragment_program { GLboolean translated; GLboolean error; - - struct r300_fragment_program_external_state state; - union rX00_fragment_program_code { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - - GLboolean writes_depth; - GLuint optimization; - struct r300_fragment_program *next; + struct r300_fragment_program_external_state state; - /* attribute that we are sending the WPOS in */ - gl_frag_attrib wpos_attr; - /* attribute that we are sending the fog coordinate in */ - gl_frag_attrib fog_attr; + struct rX00_fragment_program_code code; }; struct r300_fragment_program_cont { @@ -583,12 +465,6 @@ struct r300_fragment_program_cont { struct r300_fragment_program *progs; }; -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - union rX00_fragment_program_code *code; - struct gl_program *program; -}; #define R300_MAX_AOS_ARRAYS 16 @@ -610,8 +486,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); - void (* FragmentProgramDump)(union rX00_fragment_program_code *code); void (* SetupPixelShader)(GLcontext *ctx); }; @@ -669,7 +543,7 @@ struct r300_context { uint32_t s3tc_force_disabled:1; uint32_t stencil_two_side_disabled:1; } options; - + struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c deleted file mode 100644 index 55c1cfe631..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include "shader/prog_parameter.h" - -#include "r300_context.h" -#include "r300_fragprog_swizzle.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. - */ -GLboolean r300_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } - - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonFindFreeTemporary(t); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MUL; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } else if (inst.SaturateMode) { - destredirect = GL_TRUE; - } - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SaturateMode = inst.SaturateMode; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -/* just some random things... */ -void r300FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r300_fragment_program_code *code = &c->r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (code->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); - - if (n > 0 || code->first_node_has_tex) { - fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h deleted file mode 100644 index 5ce6f33cee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "shader/program.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_program.h" - -#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 -#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index f5c4c0f4a0..f28162a2b6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -43,175 +43,13 @@ #include "shader/prog_print.h" #include "r300_state.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" -#include "radeon_program.h" -#include "radeon_program_alu.h" +#include "compiler/radeon_program.h" +#include "compiler/radeon_program_alu.h" +#include "compiler/r300_fragprog_swizzle.h" +#include "compiler/r300_fragprog.h" +#include "compiler/r500_fragprog.h" -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) { - compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; - return; - } - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) - { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); - InputsRead |= 1 << i; - compiler->fp->Base->InputsRead = InputsRead; - compiler->fp->wpos_attr = i; - break; - } - } - - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - i = 0; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -/** - * Rewrite fragment.fogcoord to use a texture coordinate slot. - * Note that fogcoord is forced into an X001 pattern, and this enforcement - * is done here. - * - * See also the counterpart rewriting for vertex programs. - */ -static void rewriteFog(struct r300_fragment_program_compiler *compiler) -{ - struct r300_fragment_program *fp = compiler->fp; - GLuint InputsRead; - int i; - - InputsRead = fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_FOGC)) { - fp->fog_attr = FRAG_ATTRIB_MAX; - return; - } - - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) - { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); - InputsRead |= 1 << i; - fp->Base->InputsRead = InputsRead; - fp->fog_attr = i; - break; - } - } - - { - struct prog_instruction *inst; - - inst = compiler->program->Instructions; - while (inst->Opcode != OPCODE_END) { - const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { - inst->SrcReg[i].Index = fp->fog_attr; - inst->SrcReg[i].Swizzle = combine_swizzles( - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), - inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - } -} static GLuint build_dtm(GLuint depthmode) { @@ -251,121 +89,23 @@ static void build_state( } } -static void rewrite_depth_out(struct gl_program *prog) -{ - struct prog_instruction *inst; - - for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) - continue; - - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } - } -} void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_compiler compiler; - compiler.r300 = r300; - compiler.fp = fp; + compiler.ctx = ctx; compiler.code = &fp->code; + compiler.state = fp->state; compiler.program = fp->Base; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - insert_WPOS_trailer(&compiler); - - rewriteFog(&compiler); - - rewrite_depth_out(compiler.program); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + if (!r3xx_compile_fragment_program(&compiler)) fp->error = GL_TRUE; fp->translated = GL_TRUE; - - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&fp->code); } struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c deleted file mode 100644 index b75656e7ee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * Emit the r300_fragment_program_code that can be understood by the hardware. - * Input is a pre-transformed radeon_program. - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \todo FogOption - */ - -#include "r300_fragprog.h" - -#include "radeon_program_pair.h" -#include "r300_fragprog_swizzle.h" -#include "r300_reg.h" - - -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = &c->code->r300 - -#define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - - -static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == index) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R300_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = index; - } - - return GL_TRUE; -} - - -/** - * Mark a temporary register as used. - */ -static void use_temporary(struct r300_fragment_program_code *code, GLuint index) -{ - if (index > code->max_temp_idx) - code->max_temp_idx = index; -} - - -static GLuint translate_rgb_opcode(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTC_CMP; - case OPCODE_DP3: return R300_ALU_OUTC_DP3; - case OPCODE_DP4: return R300_ALU_OUTC_DP4; - case OPCODE_FRC: return R300_ALU_OUTC_FRC; - default: - error("translate_rgb_opcode(%i): Unknown opcode", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTC_MAD; - case OPCODE_MAX: return R300_ALU_OUTC_MAX; - case OPCODE_MIN: return R300_ALU_OUTC_MIN; - case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; - } -} - -static GLuint translate_alpha_opcode(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTA_CMP; - case OPCODE_DP3: return R300_ALU_OUTA_DP4; - case OPCODE_DP4: return R300_ALU_OUTA_DP4; - case OPCODE_EX2: return R300_ALU_OUTA_EX2; - case OPCODE_FRC: return R300_ALU_OUTA_FRC; - case OPCODE_LG2: return R300_ALU_OUTA_LG2; - default: - error("translate_rgb_opcode(%i): Unknown opcode", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTA_MAD; - case OPCODE_MAX: return R300_ALU_OUTA_MAX; - case OPCODE_MIN: return R300_ALU_OUTA_MIN; - case OPCODE_RCP: return R300_ALU_OUTA_RCP; - case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; - } -} - -/** - * Emit one paired ALU instruction. - */ -static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) -{ - PROG_CODE; - - if (code->alu.length >= R300_PFS_MAX_ALU_INST) { - error("Too many ALU instructions"); - return GL_FALSE; - } - - int ip = code->alu.length++; - int j; - code->node[code->cur_node].alu_end++; - - code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); - - for(j = 0; j < 3; ++j) { - GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); - if (!inst->RGB.Src[j].Constant) - use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); - - src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); - if (!inst->Alpha.Src[j].Constant) - use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); - - GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); - arg |= inst->RGB.Arg[j].Abs << 6; - arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); - - arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); - arg |= inst->Alpha.Arg[j].Abs << 6; - arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); - } - - if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; - if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; - - if (inst->RGB.WriteMask) { - use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | - (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); - } - if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; - } - - if (inst->Alpha.WriteMask) { - use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_REG; - } - if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; - } - if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->writes_depth = GL_TRUE; - } - - return GL_TRUE; -} - - -/** - * Finish the current node without advancing to the next one. - */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) -{ - struct r300_fragment_program_code *code = &c->code->r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; - - if (node->alu_end < 0) { - /* Generate a single NOP for this node */ - struct radeon_pair_instruction inst; - _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) - return GL_FALSE; - } - - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); - return GL_FALSE; - } - } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; - } - - return GL_TRUE; -} - - -/** - * Begin a block of texture instructions. - * Create the necessary indirection. - */ -static GLboolean begin_tex(void* data) -{ - PROG_CODE; - - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; - } - - if (code->cur_node == 3) { - error("Too many texture indirections"); - return GL_FALSE; - } - - if (!finish_node(c)) - return GL_FALSE; - - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; - return GL_TRUE; -} - - -static GLboolean emit_tex(void* data, struct prog_instruction* inst) -{ - PROG_CODE; - - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { - error("Too many TEX instructions"); - return GL_FALSE; - } - - GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DstReg.Index; - GLuint opcode; - - switch(inst->Opcode) { - case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; - default: - error("Unknown texture opcode %i", inst->Opcode); - return GL_FALSE; - } - - if (inst->Opcode == OPCODE_KIL) { - unit = 0; - dest = 0; - } else { - use_temporary(code, dest); - } - - use_temporary(code, inst->SrcReg[0].Index); - - code->node[code->cur_node].tex_end++; - code->tex.inst[code->tex.length++] = - (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); - return GL_TRUE; -} - - -static const struct radeon_pair_handler pair_handler = { - .EmitConst = &emit_const, - .EmitPaired = &emit_alu, - .EmitTex = &emit_tex, - .BeginTexBlock = &begin_tex, - .MaxHwTemps = R300_PFS_NUM_TEMP_REGS -}; - -/** - * Final compilation step: Turn the intermediate radeon_program into - * machine-readable instructions. - */ -GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) -{ - struct r300_fragment_program_code *code = &compiler->code->r300; - - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; - - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; - - if (!finish_node(compiler)) - return GL_FALSE; - - return GL_TRUE; -} - diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c deleted file mode 100644 index fc9d855bce..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * Utilities to deal with the somewhat odd restriction on R300 fragment - * program swizzles. - */ - -#include "r300_fragprog_swizzle.h" - -#include "r300_reg.h" -#include "radeon_nqssadce.h" - -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) - -struct swizzle_data { - GLuint hash; /**< swizzle value this matches */ - GLuint base; /**< base value for hw swizzle */ - GLuint stride; /**< difference in base between arg0/1/2 */ -}; - -static const struct swizzle_data native_swizzles[] = { - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} -}; - -static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); - - -/** - * Find a native RGB swizzle that matches the given swizzle. - * Returns 0 if none found. - */ -static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) -{ - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data* sd = &native_swizzles[i]; - for(comp = 0; comp < 3; ++comp) { - GLuint swz = GET_SWZ(swizzle, comp); - if (swz == SWIZZLE_NIL) - continue; - if (swz != GET_SWZ(sd->hash, comp)) - break; - } - if (comp == 3) - return sd; - } - - return 0; -} - - -/** - * Check whether the given instruction supports the swizzle and negate - * combinations in the given source register. - */ -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) -{ - if (reg.Abs) - reg.Negate = NEGATE_NONE; - - if (opcode == OPCODE_KIL || - opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP) { - int j; - - if (reg.Abs || reg.Negate) - return GL_FALSE; - - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(reg.Swizzle, j); - if (swz == SWIZZLE_NIL) - continue; - if (swz != j) - return GL_FALSE; - } - - return GL_TRUE; - } - - GLuint relevant = 0; - int j; - - for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) - relevant |= 1 << j; - - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; - - if (!lookup_native_swizzle(reg.Swizzle)) - return GL_FALSE; - - return GL_TRUE; -} - - -/** - * Generate MOV dst, src using only native swizzles. - */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) -{ - if (src.Abs) - src.Negate = NEGATE_NONE; - - while(dst.WriteMask) { - const struct swizzle_data *best_swizzle = 0; - GLuint best_matchcount = 0; - GLuint best_matchmask = 0; - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data *sd = &native_swizzles[i]; - GLuint matchcount = 0; - GLuint matchmask = 0; - for(comp = 0; comp < 3; ++comp) { - if (!GET_BIT(dst.WriteMask, comp)) - continue; - GLuint swz = GET_SWZ(src.Swizzle, comp); - if (swz == SWIZZLE_NIL) - continue; - if (swz == GET_SWZ(sd->hash, comp)) { - /* check if the negate bit of current component - * is the same for already matched components */ - if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) - continue; - - matchcount++; - matchmask |= 1 << comp; - } - } - if (matchcount > best_matchcount) { - best_swizzle = sd; - best_matchcount = matchcount; - best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) - break; - } - } - - struct prog_instruction *inst; - - _mesa_insert_instructions(s->Program, s->IP, 1); - inst = s->Program->Instructions + s->IP++; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; - /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ - - dst.WriteMask &= ~inst->DstReg.WriteMask; - } -} - - -/** - * Translate an RGB (XYZ) swizzle into the hardware code for the given - * instruction source. - */ -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) -{ - const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - - if (!sd) { - _mesa_printf("Not a native swizzle: %08x\n", swizzle); - return 0; - } - - return sd->base + src*sd->stride; -} - - -/** - * Translate an Alpha (W) swizzle into the hardware code for the given - * instruction source. - */ -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) -{ - if (swizzle < 3) - return swizzle + 3*src; - - switch(swizzle) { - case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; - default: return R300_ALU_ARGA_ONE; - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h deleted file mode 100644 index 231bf4eef5..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __R300_FRAGPROG_SWIZZLE_H_ -#define __R300_FRAGPROG_SWIZZLE_H_ - -#include "main/glheader.h" -#include "shader/prog_instruction.h" - -struct nqssadce_state; - -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); - -#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ddabd53992..2fa626bab2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -55,7 +55,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_context.h" #include "vblank.h" @@ -66,6 +65,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + static void r300EmitClearState(GLcontext * ctx); static void r300ClearBuffer(r300ContextPtr r300, int flags, @@ -546,7 +605,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags) /* Make sure it fits there. */ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - + if (flags & BUFFER_BIT_COLOR0) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 12fbf281d9..62a03281ca 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_fragprog_common.h" -#include "r300_fragprog.h" -#include "r500_fragprog.h" #include "r300_render.h" #include "r300_vertprog.h" @@ -458,7 +456,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -1230,7 +1228,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; R300_STATECHANGE(r300, fpt); @@ -1272,7 +1270,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -2063,7 +2061,7 @@ static void r300SetupPixelShader(GLcontext *ctx) struct r300_fragment_program_code *code; int i, k; - code = &fp->code.r300; + code = &fp->code.code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2137,7 +2135,7 @@ static void r500SetupPixelShader(GLcontext *ctx) ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - code = &fp->code.r500; + code = &fp->code.code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; @@ -2345,13 +2343,9 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 56ed519cf4..a7e8e71149 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -150,16 +150,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } - if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + if (rmesa->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } - if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; + if (rmesa->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index de32013032..ab5ca4322e 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,7 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" -#include "radeon_nqssadce.h" +#include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" @@ -1558,12 +1558,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->Base); } - if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); + if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(&vp->Base->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0); } - if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); + if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(&vp->Base->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0); } addArtificialOutputs(ctx, prog); @@ -1640,8 +1640,8 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; wanted_key.FpReads = r300->selected_fp->Base->InputsRead; - wanted_key.FogAttr = r300->selected_fp->fog_attr; - wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + wanted_key.FogAttr = r300->selected_fp->code.fog_attr; + wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr; for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c deleted file mode 100644 index 4d58cf2162..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r500_fragprog.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - */ -GLboolean r500_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) -{ - GLuint relevant; - int i; - - if (opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP || - opcode == OPCODE_KIL) { - if (reg.Abs) - return GL_FALSE; - - if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) - return GL_FALSE; - - if (reg.Negate) - reg.Negate ^= NEGATE_XYZW; - - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { - reg.Negate &= ~(1 << i); - continue; - } - if (swz >= 4) - return GL_FALSE; - } - - if (reg.Negate) - return GL_FALSE; - - return GL_TRUE; - } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { - /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; - * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) - return GL_TRUE; - - return GL_FALSE; - } else { - /* ALU instructions support almost everything */ - if (reg.Abs) - return GL_TRUE; - - relevant = 0; - for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) - relevant |= 1 << i; - } - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; - - return GL_TRUE; - } -} - -/** - * Implement a MOV with a potentially non-native swizzle. - * - * The only thing we *cannot* do in an ALU instruction is per-component - * negation. Therefore, we split the MOV into two instructions when necessary. - */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) -{ - struct prog_instruction *inst; - GLuint negatebase[2] = { 0, 0 }; - int i; - - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(src.Swizzle, i); - if (swz == SWIZZLE_NIL) - continue; - negatebase[GET_BIT(src.Negate, i)] |= 1 << i; - } - - _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); - inst = s->Program->Instructions + s->IP; - - for(i = 0; i <= 1; ++i) { - if (!negatebase[i]) - continue; - - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask = negatebase[i]; - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; - inst++; - s->IP++; - } -} - - -static char *toswiz(int swiz_val) { - switch(swiz_val) { - case 0: return "R"; - case 1: return "G"; - case 2: return "B"; - case 3: return "A"; - case 4: return "0"; - case 5: return "1/2"; - case 6: return "1"; - case 7: return "U"; - } - return NULL; -} - -static char *toop(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP3"; break; - case 2: str = "DP4"; break; - case 3: str = "D2A"; break; - case 4: str = "MIN"; break; - case 5: str = "MAX"; break; - case 6: str = "Reserved"; break; - case 7: str = "CND"; break; - case 8: str = "CMP"; break; - case 9: str = "FRC"; break; - case 10: str = "SOP"; break; - case 11: str = "MDH"; break; - case 12: str = "MDV"; break; - } - return str; -} - -static char *to_alpha_op(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP"; break; - case 2: str = "MIN"; break; - case 3: str = "MAX"; break; - case 4: str = "Reserved"; break; - case 5: str = "CND"; break; - case 6: str = "CMP"; break; - case 7: str = "FRC"; break; - case 8: str = "EX2"; break; - case 9: str = "LN2"; break; - case 10: str = "RCP"; break; - case 11: str = "RSQ"; break; - case 12: str = "SIN"; break; - case 13: str = "COS"; break; - case 14: str = "MDH"; break; - case 15: str = "MDV"; break; - } - return str; -} - -static char *to_mask(int val) -{ - char *str = NULL; - switch(val) { - case 0: str = "NONE"; break; - case 1: str = "R"; break; - case 2: str = "G"; break; - case 3: str = "RG"; break; - case 4: str = "B"; break; - case 5: str = "RB"; break; - case 6: str = "GB"; break; - case 7: str = "RGB"; break; - case 8: str = "A"; break; - case 9: str = "AR"; break; - case 10: str = "AG"; break; - case 11: str = "ARG"; break; - case 12: str = "AB"; break; - case 13: str = "ARB"; break; - case 14: str = "AGB"; break; - case 15: str = "ARGB"; break; - } - return str; -} - -static char *to_texop(int val) -{ - switch(val) { - case 0: return "NOP"; - case 1: return "LD"; - case 2: return "TEXKILL"; - case 3: return "PROJ"; - case 4: return "LODBIAS"; - case 5: return "LOD"; - case 6: return "DXDY"; - } - return NULL; -} - -void r500FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r500_fragment_program_code *code = &c->r500; - fprintf(stderr, "R500 Fragment Program:\n--------\n"); - - int n; - uint32_t inst; - uint32_t inst0; - char *str = NULL; - - if (code->const_nr) { - fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %i[%i]\n", n, - code->constant[n].File, code->constant[n].Index); - } - fprintf(stderr, "--------\n"); - } - - for (n = 0; n < code->inst_end+1; n++) { - inst0 = inst = code->inst[n].inst0; - fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); - switch(inst & 0x3) { - case R500_INST_TYPE_ALU: str = "ALU"; break; - case R500_INST_TYPE_OUT: str = "OUT"; break; - case R500_INST_TYPE_FC: str = "FC"; break; - case R500_INST_TYPE_TEX: str = "TEX"; break; - }; - fprintf(stderr,"%s %s %s %s %s ", str, - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); - fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), - to_mask((inst >> 15) & 0xf)); - - switch(inst0 & 0x3) { - case 0: - case 1: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); - inst = code->inst[n].inst1; - - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); - inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), - (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3); - - - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); - inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, - (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, - (inst >> 31) & 0x1); - - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); - inst = code->inst[n].inst5; - fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), - (inst >> 23) & 0x3, - (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); - break; - case 2: - break; - case 3: - inst = code->inst[n].inst1; - fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, - to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", - (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = code->inst[n].inst2; - fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, - inst & 127, inst & (1<<7) ? "(rel)" : "", - toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), - toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), - (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", - toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), - toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); - - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); - break; - } - fprintf(stderr,"\n"); - } - -} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h deleted file mode 100644 index 1179bf6607..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R500_FRAGPROG_H_ -#define __R500_FRAGPROG_H_ - -#include "shader/prog_parameter.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_nqssadce.h" - -extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); - -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); - -extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c deleted file mode 100644 index 30f4514897..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * Copyright 2008 Corbin Simpson - * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \author Corbin Simpson - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - */ - -#include "r500_fragprog.h" - -#include "radeon_program_pair.h" - - -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = &c->code->r500 - -#define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - - -/** - * Callback to register hardware constants. - */ -static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == idx) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R500_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = idx; - } - - return GL_TRUE; -} - -static GLuint translate_rgb_op(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; - default: - error("translate_rgb_op(%d): unknown opcode\n", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; - } -} - -static GLuint translate_alpha_op(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; - default: - error("translate_alpha_op(%d): unknown opcode\n", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; - } -} - -static GLuint fix_hw_swizzle(GLuint swz) -{ - if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; - return swz; -} - -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) -{ - GLuint t = inst->RGB.Arg[arg].Source; - int comp; - t |= inst->RGB.Arg[arg].Negate << 11; - t |= inst->RGB.Arg[arg].Abs << 12; - - for(comp = 0; comp < 3; ++comp) - t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); - - return t; -} - -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) -{ - GLuint t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; - t |= inst->Alpha.Arg[i].Negate << 5; - t |= inst->Alpha.Arg[i].Abs << 6; - return t; -} - -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) -{ - if (index > code->max_temp_idx) - code->max_temp_idx = index; -} - -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) -{ - if (!src.Constant) - use_temporary(code, src.Index); - return src.Index | src.Constant << 8; -} - - -/** - * Emit a paired ALU instruction. - */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) -{ - PROG_CODE; - - if (code->inst_end >= 511) { - error("emit_alu: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - - code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); - - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) - code->inst[ip].inst0 = R500_INST_TYPE_OUT; - else - code->inst[ip].inst0 = R500_INST_TYPE_ALU; - code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; - - code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); - code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); - if (inst->Alpha.DepthWriteMask) { - code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->fp->writes_depth = GL_TRUE; - } - - code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); - code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); - use_temporary(code, inst->Alpha.DestIndex); - use_temporary(code, inst->RGB.DestIndex); - - if (inst->RGB.Saturate) - code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; - if (inst->Alpha.Saturate) - code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; - - code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); - code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); - code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); - - code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); - - code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; - code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; - - code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; - code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - - return GL_TRUE; -} - -static GLuint translate_strq_swizzle(struct prog_src_register src) -{ - GLuint swiz = 0; - int i; - for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; - return swiz; -} - -/** - * Emit a single TEX instruction - */ -static GLboolean emit_tex(void *data, struct prog_instruction *inst) -{ - PROG_CODE; - - if (code->inst_end >= 511) { - error("emit_tex: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - - code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) - | R500_INST_TEX_SEM_WAIT; - code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; - - switch (inst->Opcode) { - case OPCODE_KIL: - code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; - break; - case OPCODE_TEX: - code->inst[ip].inst1 |= R500_TEX_INST_LD; - break; - case OPCODE_TXB: - code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; - break; - case OPCODE_TXP: - code->inst[ip].inst1 |= R500_TEX_INST_PROJ; - break; - default: - error("emit_tex can't handle opcode %x\n", inst->Opcode); - } - - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0]) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - return GL_TRUE; -} - -static const struct radeon_pair_handler pair_handler = { - .EmitConst = emit_const, - .EmitPaired = emit_paired, - .EmitTex = emit_tex, - .MaxHwTemps = 128 -}; - -GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) -{ - struct r500_fragment_program_code *code = &compiler->code->r500; - - _mesa_bzero(code, sizeof(*code)); - code->max_temp_idx = 1; - code->inst_offset = 0; - code->inst_end = -1; - - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; - - if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { - /* This may happen when dead-code elimination is disabled or - * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= 511) { - error("Introducing fake OUT: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; - } - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c deleted file mode 100644 index 202a8532b6..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. - */ - -#include "radeon_nqssadce.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - case PROGRAM_ADDRESS: return &s->Address; - default: return 0; - } -} - - -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = NEGATE_NONE; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) -{ - int i; - GLuint deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst = s->Program->Instructions + s->IP; - inst->SrcReg[src].File = PROGRAM_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = NEGATE_NONE; - inst->SrcReg[src].Abs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate; - - if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - if (regstate) - regstate->Sourced |= WRITEMASK_X; - } else { - regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - } - - return inst; -} - -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) -{ - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) -{ - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct prog_instruction *inst = s->Program->Instructions + s->IP; - - if (inst->Opcode == OPCODE_END) - return; - - if (inst->Opcode != OPCODE_KIL) { - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); - return; - } - - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_SGE: - case OPCODE_SLT: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); - break; - case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); - break; - case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); - break; - case OPCODE_DST: - inst = track_used_srcreg(s, inst, 0, 0x6); - inst = track_used_srcreg(s, inst, 1, 0xa); - break; - case OPCODE_EXP: - case OPCODE_LOG: - case OPCODE_POW: - inst = track_used_srcreg(s, inst, 0, 0x3); - break; - case OPCODE_LIT: - inst = track_used_srcreg(s, inst, 0, 0xb); - break; - default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } -} - -static void calculateInputsOutputs(struct gl_program *p) -{ - struct prog_instruction *inst; - GLuint InputsRead, OutputsWritten; - - inst = p->Instructions; - InputsRead = 0; - OutputsWritten = 0; - while (inst->Opcode != OPCODE_END) - { - int i, num_src_regs; - - num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < num_src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) - InputsRead |= 1 << inst->SrcReg[i].Index; - } - - if (inst->DstReg.File == PROGRAM_OUTPUT) - OutputsWritten |= 1 << inst->DstReg.Index; - - ++inst; - } - - p->InputsRead = InputsRead; - p->OutputsWritten = OutputsWritten; -} - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) -{ - struct nqssadce_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; - s.Descr = descr; - s.Descr->Init(&s); - s.IP = p->NumInstructions; - - while(s.IP > 0) { - s.IP--; - process_instruction(&s); - } - - calculateInputsOutputs(p); -} diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h deleted file mode 100644 index 8626f21c25..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - GLuint Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - int IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; - struct register_state Address; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - void *Data; -}; - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h deleted file mode 100644 index 88474d43a2..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_H_ -#define __RADEON_PROGRAM_H_ - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" - - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; - -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ -}; - -enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ -}; - -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) - -static inline GLuint get_swz(GLuint swz, GLuint idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) -{ - GLuint ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline GLuint combine_swizzles(GLuint src, GLuint swz) -{ - GLuint ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; - - return ret; -} - - -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; - -/** - * A transformation that can be passed to \ref radeonLocalTransform. - * - * The function will be called once for each instruction. - * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the - * instruction. - * - * The function gets passed the userData as last parameter. - */ -struct radeon_program_transformation { - GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, - void*); - void *userData; -}; - -void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations); - -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); - -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); - -#endif diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c deleted file mode 100644 index 8283723bad..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ /dev/null @@ -1,635 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Shareable transformations that transform "special" ALU instructions - * into ALU instructions that are supported by hardware. - * - */ - -#include "radeon_program_alu.h" - -#include "shader/prog_parameter.h" - - -static struct prog_instruction *emit1(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; - return fpi; -} - -static struct prog_instruction *emit2(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - return fpi; -} - -static struct prog_instruction *emit3(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; - return fpi; -} - -static struct prog_dst_register dstreg(int file, int index) -{ - struct prog_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static struct prog_dst_register dstregtmpmask(int index, int mask) -{ - struct prog_dst_register dst; - dst.File = PROGRAM_TEMPORARY; - dst.Index = index; - dst.WriteMask = mask; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_0000 -}; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_1111 -}; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, - .Index = 0, - .Swizzle = SWIZZLE_NOOP -}; - -static struct prog_src_register srcreg(int file, int index) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - return src; -} - -static struct prog_src_register srcregswz(int file, int index, int swz) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - src.Swizzle = swz; - return src; -} - -static struct prog_src_register absolute(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.Abs = 1; - newreg.Negate = NEGATE_NONE; - return newreg; -} - -static struct prog_src_register negate(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ NEGATE_XYZW; - return newreg; -} - -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) -{ - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); - return swizzled; -} - -static struct prog_src_register scalar(struct prog_src_register reg) -{ - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); -} - -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src = inst->SrcReg[0]; - src.Abs = 1; - src.Negate = NEGATE_NONE; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); -} - -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - src0.Negate &= ~NEGATE_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); -} - -/** - * [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. - */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); -} - -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - -/** - * Definition of LIT (from ARB_fragment_program): - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots, if the subsequent optimization passes are clever enough - * to pair instructions correctly. - */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - static const GLfloat LitConst[4] = { -127.999999 }; - - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - int needTemporary = 0; - struct prog_src_register srctemp; - - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); - - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; - } - srctemp = srcreg(PROGRAM_TEMPORARY, temp); - - // tmp.x = max(0.0, Src.x); - // tmp.y = max(0.0, Src.y); - // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); - - // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - - // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - builtin_zero); - - // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); -} - -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); -} - -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; - - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); -} - -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); -} - -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); -} - -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); -} - -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); -} - -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); -} - -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - - -/** - * Can be used as a transformation for @ref radeonClauseLocalTransform, - * no userData necessary. - * - * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD - * using: - * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP - * - * Transforms RSQ to Radeon's native RSQ by explicitly setting - * absolute value. - * - * @note should be applicable to R300 and R500 fragment programs. - */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; - default: - return GL_FALSE; - } -} - - -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) -{ - static const GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.5, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } - }; - int i; - - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } -} - -/** - * Approximate sin(x), where x is clamped to (-pi/2, pi/2). - * - * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } - * MAD tmp.x, tmp.y, |src|, tmp.x - * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x - * MAD dest, tmp.y, weight, tmp.x - */ -static void sin_approx(struct radeon_transform_context* t, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); -} - -/** - * Translate the trigonometric functions COS, SIN, and SCS - * using only the basic instructions - * MOV, ADD, MUL, MAD, FRC - */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); - - sincos_constants(t, constants); - - if (inst->Opcode == OPCODE_COS) { - // MAD tmp.x, src, 1/(2*PI), 0.75 - // FRC tmp.x, tmp.x - // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->DstReg; - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - constants); - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - constants); - } - - return GL_TRUE; -} - - -/** - * Transform the trigonometric functions COS, SIN, and SCS - * to include pre-scaling by 1/(2*PI) and taking the fractional - * part, so that the input to COS and SIN is always in the range [0,1). - * SCS is replaced by one COS and one SIN instruction. - * - * @warning This transformation implicitly changes the semantics of SIN and COS! - */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; - - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; - - if (inst->DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - } - - return GL_TRUE; -} - -/** - * Rewrite DDX/DDY instructions to properly work with r5xx shaders. - * The r5xx MDH/MDV instruction provides per-quad partial derivatives. - * It takes the form A*B+C. A and C are set by setting src0. B should be -1. - * - * @warning This explicitly changes the form of DDX and DDY! - */ - -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) - return GL_FALSE; - - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.Negate = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h deleted file mode 100644 index b45958115c..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_ALU_H_ -#define __RADEON_PROGRAM_ALU_H_ - -#include "radeon_program.h" - -GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c deleted file mode 100644 index d6fb474cf2..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ /dev/null @@ -1,1004 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ - -#include "radeon_program_pair.h" - -#include "radeon_common.h" - -#include "shader/prog_print.h" - -#define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ -} while(0) - -struct pair_state_instruction { - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - GLuint NumDependencies:5; - - /** - * Next instruction in the linked list of ready instructions. - */ - struct pair_state_instruction *NextReady; - - /** - * Values that this instruction writes - */ - struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. - */ -struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ - - /** - * Unordered linked list of instructions that read from this value. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is calculated during @ref scan_instructions - * and continually decremented during code emission. - * When this count reaches zero, the instruction that writes the @ref Next value - * can be scheduled. - */ - GLuint NumReaders; -}; - -/** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - - /** - * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). - */ - struct reg_value *Value[4]; -}; - -struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; - const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; - GLboolean Verbose; - void *UserData; - - /** - * Translate Mesa registers to hardware registers - */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - - struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ - } HwTemps[128]; - - /** - * Linked list of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - struct pair_state_instruction *ReadyFullALU; - struct pair_state_instruction *ReadyRGB; - struct pair_state_instruction *ReadyAlpha; - struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; - default: return 0; - } -} - -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) -{ - struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); - s->HwTemps[hwindex].RefCount = t->RefCount; - t->Allocated = 1; - t->HwIndex = hwindex; -} - -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) -{ - GLuint hwindex; - - struct pair_register_translation *t = get_register(s, file, index); - if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); - return 0; - } - - if (t->Allocated) - return t->HwIndex; - - for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) - if (!s->HwTemps[hwindex].RefCount) - break; - - if (hwindex >= s->Handler->MaxHwTemps) { - error("Ran out of hardware temporaries"); - return 0; - } - - alloc_hw_reg(s, file, index, hwindex); - return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) -{ - if (!s->HwTemps[hwindex].RefCount) { - error("Hwindex %i refcount error", hwindex); - return; - } - - s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ - pairinst->NextReady = *list; - *list = pairinst; -} - -/** - * The instruction at the given IP has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); - - if (pairinst->IsTex) - add_pairinst_to_list(&s->ReadyTEX, pairinst); - else if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s->ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s->ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) -{ - struct prog_src_register tmp; - - switch(inst->Opcode) { - case OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].Negate = NEGATE_NONE; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) -{ - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - - switch(inst->Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; - break; - case OPCODE_DP4: - pairinst->NeedAlpha = 1; - /* fall through */ - case OPCODE_DP3: - pairinst->NeedRGB = 1; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: - pairinst->IsTex = 1; - break; - default: - error("Unknown opcode %d\n", inst->Opcode); - break; - } -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; - GLuint ip; - - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int j; - for(j = 0; j < nsrc; j++) { - struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); - if (!t) - continue; - - t->RefCount++; - - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - int i; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); - if (swz >= 4) - continue; /* constant or NIL swizzle */ - if (!t->Value[swz]) - continue; /* this is an undefined read */ - - /* Do not add a dependency if this instruction - * also rewrites the value. The code below adds - * a dependency for the DstReg, which is a superset - * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; - pairinst->NumDependencies++; - t->Value[swz]->NumReaders++; - r->IP = ip; - r->Next = t->Value[swz]->Readers; - t->Value[swz]->Readers = r; - } - } - } - - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); - if (ndst) { - struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); - if (t) { - t->RefCount++; - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - int j; - for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) - continue; - - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; - if (t->Value[j]) { - pairinst->NumDependencies++; - t->Value[j]->Next = v; - } - t->Value[j] = v; - pairinst->Values[j] = v; - } - } - } - } - - if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - - if (!pairinst->NumDependencies) - instruction_ready(s, ip); - } - - /* Clear the PROGRAM_TEMPORARY state */ - int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { - for(j = 0; j < 4; ++j) - s->Temps[i].Value[j] = 0; - } -} - - -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - ASSERT(pairinst->NumDependencies > 0); - if (!--pairinst->NumDependencies) - instruction_ready(s, ip); -} - -/** - * Update the dependency tracking state based on what the instruction - * at the given IP does. - */ -static void commit_instruction(struct pair_state *s, int ip) -{ - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; - deref_hw_reg(s, t->HwIndex); - - int i; - for(i = 0; i < 4; ++i) { - if (!GET_BIT(inst->DstReg.WriteMask, i)) - continue; - - t->Value[i] = pairinst->Values[i]; - if (t->Value[i]->NumReaders) { - struct reg_value_reader *r; - for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); - } else if (t->Value[i]->Next) { - /* This happens when the only reader writes - * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); - } - } - } - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; i++) { - struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (!t) - continue; - - deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) - continue; - - int j; - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz >= 4) - continue; - if (!t->Value[swz]) - continue; - - /* Do not free a dependency if this instruction - * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[i].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - if (!--t->Value[swz]->NumReaders) { - if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); - } - } - } -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ - struct pair_state_instruction *readytex; - struct pair_state_instruction *pairinst; - - ASSERT(s->ReadyTEX); - - // Don't let the ready list change under us! - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - - // Allocate destination hardware registers in one block to avoid conflicts. - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - if (inst->Opcode != OPCODE_KIL) - get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - } - - if (s->Debug) - _mesa_printf(" BEGIN_TEX\n"); - - if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); - - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); - - if (inst->Opcode != OPCODE_KIL) - inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - - if (s->Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - fflush(stdout); - } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); - } - - if (s->Debug) - _mesa_printf(" END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) -{ - int candidate = -1; - int candidate_quality = -1; - int i; - - if (!rgb && !alpha) - return 0; - - GLuint constant; - GLuint index; - - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { - constant = 0; - index = get_hw_reg(s, src.File, src.Index); - } else { - constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); - } - - for(i = 0; i < 3; ++i) { - int q = 0; - if (rgb) { - if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].Constant != constant || - pair->RGB.Src[i].Index != index) - continue; - q++; - } - } - if (alpha) { - if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].Constant != constant || - pair->Alpha.Src[i].Index != index) - continue; - q++; - } - } - if (q > candidate_quality) { - candidate_quality = q; - candidate = i; - } - } - - if (candidate >= 0) { - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].Constant = constant; - pair->RGB.Src[candidate].Index = index; - } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].Constant = constant; - pair->Alpha.Src[candidate].Index = index; - } - } - - return candidate; -} - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); - - if (pairinst->NeedRGB) { - if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (pairinst->NeedAlpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - int nargs = _mesa_num_inst_src_regs(inst->Opcode); - int i; - - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { - if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; - else - nargs++; - } - - for(i = 0; i < nargs; ++i) { - int source; - if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - int j; - for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - } - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); - } - if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); - } - } - - return GL_TRUE; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - if (pairinst->NeedRGB) { - pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - } - if (pairinst->NeedAlpha) { - pair->Alpha.DestIndex = hwindex; - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ - struct radeon_pair_instruction pair; - - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; - if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - ip = s->ReadyAlpha - s->Instructions; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - } else { - struct pair_state_instruction **prgb; - struct pair_state_instruction **palpha; - - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) - continue; - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); - goto success; - } - } - - /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - success: ; - } - - if (s->Debug) - radeonPrintPairInstruction(&pair); - - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler* handler, void *userdata) -{ - struct pair_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = _mesa_clone_program(ctx, program); - s.Handler = handler; - s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; - - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) - _mesa_printf("Emit paired program\n"); - - scan_instructions(&s); - allocate_input_registers(&s); - - while(!s.Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s); - - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) - emit_alu(&s); - } - - if (s.Debug) - _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - _mesa_reference_program(ctx, &s.Program, NULL); - - return !s.Error; -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); -} - -static int num_pairinst_args(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); -} - -static char swizzle_char(GLuint swz) -{ - switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; - default: return '?'; - } -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ - int nargs; - int i; - - _mesa_printf(" RGB: "); - for(i = 0; i < 3; ++i) { - if (inst->RGB.Src[i].Used) - print_pair_src(i, inst->RGB.Src + i); - } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); - for(i = 0; i < 3; ++i) { - if (inst->Alpha.Src[i].Used) - print_pair_src(i, inst->Alpha.Src + i); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - nargs = num_pairinst_args(inst->RGB.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), - abs); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); - if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); - nargs = num_pairinst_args(inst->Alpha.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, - swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); - } - _mesa_printf("\n"); -} diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/radeon_program_pair.h deleted file mode 100644 index 4624a24629..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_PAIR_H_ -#define __RADEON_PROGRAM_PAIR_H_ - -#include "radeon_program.h" - - -/** - * Represents a paired instruction, as found in R300 and R500 - * fragment programs. - */ -struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; -}; - -struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction { - struct radeon_pair_instruction_rgb RGB; - struct radeon_pair_instruction_alpha Alpha; -}; - - -/** - * - */ -struct radeon_pair_handler { - /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** - * Write a paired instruction to the hardware. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); - - /** - * Write a texture instruction to the hardware. - * Register indices have already been rewritten to the allocated - * hardware register numbers. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); - - /** - * Called before a block of contiguous, independent texture - * instructions is emitted. - */ - GLboolean (*BeginTexBlock)(void*); - - GLuint MaxHwTemps; -}; - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler*, void *userdata); - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); - -#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 118e74008f..5f1af5b0da 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -57,7 +57,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) #include "r600_context.h" @@ -150,6 +149,9 @@ extern const struct dri_extension point_extensions[]; #elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + /* TODO: integrate these into xmlpool.h! */ #define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_image_units,int,def, # min ":" # max ) \ -- cgit v1.2.3