summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h39
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c15
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.c19
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c290
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.h6
-rw-r--r--src/mesa/drivers/dri/r300/r300_shader.c64
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c166
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_swtcl.c50
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c656
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.h4
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.c94
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.h7
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.h32
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_pair.c4
17 files changed, 883 insertions, 570 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index b7d75426c5..6f3aab986d 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -418,8 +418,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
- _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
-
/* Initialize the software rasterizer and helper modules.
*/
_swrast_CreateContext(ctx);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 026c33c67c..f7af7d4e57 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -405,12 +405,13 @@ struct r300_hw_state {
#undef TAG
struct r300_vertex_program {
+ struct gl_vertex_program *Base;
struct r300_vertex_program *next;
struct r300_vertex_program_key {
- GLuint InputsRead;
- GLuint OutputsWritten;
- GLuint OutputsAdded;
+ GLuint FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
} key;
struct r300_vertex_shader_hw_code {
@@ -426,13 +427,17 @@ struct r300_vertex_program {
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
- int wpos_idx;
int inputs[VERT_ATTRIB_MAX];
int outputs[VERT_RESULT_MAX];
};
struct r300_vertex_program_cont {
- struct gl_vertex_program mesa_program; /* Must be first */
+ /* This is the unmodified vertex program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific vertex program based on this.
+ */
+ struct gl_vertex_program mesa_program;
+ /* This is the list of hw specific vertex programs derived from mesa_program */
struct r300_vertex_program *progs;
};
@@ -546,7 +551,7 @@ struct r500_fragment_program_code {
* to render with that program.
*/
struct r300_fragment_program {
- struct gl_fragment_program Base;
+ struct gl_program *Base;
GLboolean translated;
GLboolean error;
@@ -559,6 +564,23 @@ struct r300_fragment_program {
GLboolean writes_depth;
GLuint optimization;
+
+ struct r300_fragment_program *next;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
+};
+
+struct r300_fragment_program_cont {
+ /* This is the unmodified fragment program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific fragment program based on this.
+ */
+ struct gl_fragment_program Base;
+ /* This is the list of hw specific fragment programs derived from Base */
+ struct r300_fragment_program *progs;
};
struct r300_fragment_program_compiler {
@@ -633,6 +655,7 @@ struct r300_context {
struct r300_hw_state hw;
struct r300_vertex_program *selected_vp;
+ struct r300_fragment_program *selected_fp;
/* Vertex buffers
*/
@@ -664,11 +687,7 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate);
-extern void r300SelectVertexShader(r300ContextPtr r300);
extern void r300InitShaderFuncs(struct dd_function_table *functions);
-extern int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp,
- float *dst);
extern void r300InitShaderFunctions(r300ContextPtr r300);
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 4e8b62f186..9769ff5399 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -195,6 +195,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
}
GLfloat *dst_ptr, *tmp;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ count = 1;
+
tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count);
switch (input->Type) {
@@ -228,7 +233,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
type = GL_FLOAT;
r300_attr.free_needed = GL_TRUE;
r300_attr.data = tmp;
- r300_attr.stride = sizeof(GLfloat) * input->Size;
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = sizeof(GLfloat) * input->Size;
+ }
r300_attr.dwords = input->Size;
} else {
type = input->Type;
@@ -332,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
int i, tmp;
- tmp = r300->selected_vp->key.InputsRead;
+ tmp = r300->selected_vp->Base->Base.InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -428,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
if (r300->fallback)
return GL_FALSE;
- r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten);
+ r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten);
r300UpdateShaderStates(r300);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
index c3817721dc..feb3370f37 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -81,17 +81,17 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
return vic_1;
}
-GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
{
GLuint ret = 0;
if (vp_writes & (1 << VERT_RESULT_HPOS))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
- if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0)
+ if (vp_writes & (1 << VERT_RESULT_COL0))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
- if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1)
+ if (vp_writes & (1 << VERT_RESULT_COL1))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
/* Two sided lighting works only if all 4 colors are written */
@@ -105,26 +105,17 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
return ret;
}
-GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
{
GLuint i, ret = 0, first_free_texcoord = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) {
+ if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
ret |= (4 << (3 * first_free_texcoord));
++first_free_texcoord;
}
}
- if (fp_reads & FRAG_BIT_WPOS) {
- ret |= (4 << (3 * first_free_texcoord));
- ++first_free_texcoord;
- }
-
- if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) {
- ret |= 4 << (3 * first_free_texcoord);
- }
-
if (first_free_texcoord > 8) {
fprintf(stderr, "\tout of free texcoords\n");
_mesa_exit(-1);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
index 2fb8b82d3a..3f8c60ffae 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.h
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -225,7 +225,7 @@ extern void r300EmitCacheFlush(r300ContextPtr rmesa);
extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
-extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
-extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index abc8757ba1..f5c4c0f4a0 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -50,13 +50,6 @@
#include "radeon_program.h"
#include "radeon_program_alu.h"
-static void update_params(GLcontext *ctx, struct gl_fragment_program *fp)
-{
- /* Ask Mesa nicely to fill in ParameterValues for us */
- if (fp->Base.Parameters)
- _mesa_load_state_parameters(ctx, fp->Base.Parameters);
-}
-
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
@@ -74,10 +67,12 @@ static void nqssadce_init(struct nqssadce_state* s)
*/
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
- GLuint InputsRead = compiler->fp->Base.Base.InputsRead;
+ GLuint InputsRead = compiler->fp->Base->InputsRead;
- if (!(InputsRead & FRAG_BIT_WPOS))
+ if (!(InputsRead & FRAG_BIT_WPOS)) {
+ compiler->fp->wpos_attr = FRAG_ATTRIB_MAX;
return;
+ }
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
@@ -85,10 +80,23 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
+ InputsRead |= 1 << i;
+ compiler->fp->Base->InputsRead = InputsRead;
+ compiler->fp->wpos_attr = i;
+ break;
+ }
+ }
+
GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
_mesa_insert_instructions(compiler->program, 0, 3);
fpi = compiler->program->Instructions;
+ i = 0;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
@@ -99,7 +107,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
i++;
@@ -111,7 +119,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
@@ -154,6 +162,57 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
}
}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_fragment_program *fp = compiler->fp;
+ GLuint InputsRead;
+ int i;
+
+ InputsRead = fp->Base->InputsRead;
+
+ if (!(InputsRead & FRAG_BIT_FOGC)) {
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
+ InputsRead |= 1 << i;
+ fp->Base->InputsRead = InputsRead;
+ fp->fog_attr = i;
+ break;
+ }
+ }
+
+ {
+ struct prog_instruction *inst;
+
+ inst = compiler->program->Instructions;
+ while (inst->Opcode != OPCODE_END) {
+ const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
+ inst->SrcReg[i].Index = fp->fog_attr;
+ inst->SrcReg[i].Swizzle = combine_swizzles(
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
+ inst->SrcReg[i].Swizzle);
+ }
+ }
+ ++inst;
+ }
+ }
+}
+
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -175,7 +234,7 @@ static GLuint build_func(GLuint comparefunc)
*/
static void build_state(
r300ContextPtr r300,
- struct r300_fragment_program *fp,
+ struct gl_fragment_program *fp,
struct r300_fragment_program_external_state *state)
{
int unit;
@@ -183,7 +242,7 @@ static void build_state(
_mesa_bzero(state, sizeof(*state));
for(unit = 0; unit < 16; ++unit) {
- if (fp->Base.Base.ShadowSamplers & (1 << unit)) {
+ if (fp->Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
@@ -192,100 +251,149 @@ static void build_state(
}
}
-void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
+static void rewrite_depth_out(struct gl_program *prog)
{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp;
- struct r300_fragment_program_external_state state;
+ struct prog_instruction *inst;
- build_state(r300, r300_fp, &state);
- if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) {
- /* TODO: cache compiled programs */
- r300_fp->translated = GL_FALSE;
- _mesa_memcpy(&r300_fp->state, &state, sizeof(state));
- }
-
- if (!r300_fp->translated) {
- struct r300_fragment_program_compiler compiler;
+ for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
+ continue;
- compiler.r300 = r300;
- compiler.fp = r300_fp;
- compiler.code = &r300_fp->code;
- compiler.program = _mesa_clone_program(ctx, &fp->Base);
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- fflush(stdout);
- _mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
}
+ }
+}
- insert_WPOS_trailer(&compiler);
+void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_compiler compiler;
+
+ compiler.r300 = r300;
+ compiler.fp = fp;
+ compiler.code = &fp->code;
+ compiler.program = fp->Base;
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ fflush(stdout);
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_program_transformation transformations[] = {
- { &r500_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformDeriv, 0 },
- { &radeonTransformTrigScale, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 4, transformations);
- } else {
- struct radeon_program_transformation transformations[] = {
- { &r300_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformTrigSimple, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 3, transformations);
- }
+ insert_WPOS_trailer(&compiler);
+
+ rewriteFog(&compiler);
+
+ rewrite_depth_out(compiler.program);
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(ctx, compiler.program, 4, transformations);
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(ctx, compiler.program, 3, transformations);
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- }
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r500FPIsNativeSwizzle,
+ .BuildSwizzle = &r500FPBuildSwizzle
+ };
+ radeonNqssaDce(ctx, compiler.program, &nqssadce);
+ } else {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle
+ };
+ radeonNqssaDce(ctx, compiler.program, &nqssadce);
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
- r300_fp->error = GL_TRUE;
+ if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
+ fp->error = GL_TRUE;
- /* Subtle: Rescue any parameters that have been added during transformations */
- _mesa_free_parameter_list(fp->Base.Parameters);
- fp->Base.Parameters = compiler.program->Parameters;
- compiler.program->Parameters = 0;
+ fp->translated = GL_TRUE;
- _mesa_reference_program(ctx, &compiler.program, NULL);
+ if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
+ r300->vtbl.FragmentProgramDump(&fp->code);
+}
- r300_fp->translated = GL_TRUE;
+struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_cont *fp_list;
+ struct r300_fragment_program *fp;
+ struct r300_fragment_program_external_state state;
- r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
+ build_state(r300, ctx->FragmentProgram._Current, &state);
- if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
- r300->vtbl.FragmentProgramDump(&r300_fp->code);
+ fp = fp_list->progs;
+ while (fp) {
+ if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) {
+ return r300->selected_fp = fp;
+ }
+ fp = fp->next;
}
- update_params(ctx, fp);
+ fp = _mesa_calloc(sizeof(struct r300_fragment_program));
+
+ fp->state = state;
+ fp->translated = GL_FALSE;
+ fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base);
+
+ fp->next = fp_list->progs;
+ fp_list->progs = fp;
+
+ return r300->selected_fp = fp;
}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
index 85ea86fecb..5e103ee408 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -30,6 +30,10 @@
#include "main/mtypes.h"
-extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp);
+#include "r300_context.h"
+
+extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp);
+
+struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 0133b83796..62228a3786 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -32,22 +32,45 @@
#include "r300_context.h"
#include "r300_fragprog_common.h"
+static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
+{
+ struct r300_fragment_program *tmp, *fp = cache->progs;
+
+ while (fp) {
+ tmp = fp->next;
+ _mesa_reference_program(ctx, &fp->Base, NULL);
+ _mesa_free(fp);
+ fp = tmp;
+ }
+}
+
+static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
+{
+ struct r300_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ _mesa_reference_vertprog(ctx, &vp->Base, NULL);
+ _mesa_free(vp);
+ vp = tmp;
+ }
+}
+
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
struct r300_vertex_program_cont *vp;
- struct r300_fragment_program *fp;
+ struct r300_fragment_program_cont *fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
vp = CALLOC_STRUCT(r300_vertex_program_cont);
- return _mesa_init_vertex_program(ctx, &vp->mesa_program,
- target, id);
+ return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
- fp = CALLOC_STRUCT(r300_fragment_program);
+ fp = CALLOC_STRUCT(r300_fragment_program_cont);
return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
default:
@@ -59,21 +82,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ freeFragProgCache(ctx, fp);
+ break;
+ }
+
_mesa_delete_program(ctx, prog);
}
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
- struct r300_vertex_program_cont *vp = (void *)prog;
- struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
- r300_fp->translated = GL_FALSE;
+ freeFragProgCache(ctx, fp);
+ fp->progs = NULL;
break;
}
@@ -85,13 +122,18 @@ static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
- struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
+ struct r300_fragment_program *fp = r300SelectFragmentShader(ctx);
if (!fp->translated)
- r300TranslateFragmentShader(ctx, &fp->Base);
+ r300TranslateFragmentShader(ctx, fp);
return !fp->error;
- } else
- return GL_TRUE;
+ } else {
+ struct r300_vertex_program *vp = r300SelectVertexShader(ctx);
+ if (!vp->translated)
+ r300TranslateVertexShader(vp);
+
+ return !vp->error;
+ }
}
void r300InitShaderFuncs(struct dd_function_table *functions)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index c0eda977db..4d504d14e3 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -452,9 +452,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
- return (fp && fp->writes_depth);
+ return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
@@ -1093,24 +1093,25 @@ r300FetchStateParameter(GLcontext * ctx,
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
*/
-void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
- struct r300_fragment_program *fp;
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
- fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
- if (!fp)
+ if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
- paramList = fp->Base.Base.Parameters;
+ paramList = rmesa->selected_fp->Base->Parameters;
if (!paramList)
return;
+ _mesa_load_state_parameters(ctx, paramList);
+
for (i = 0; i < paramList->NumParameters; i++) {
if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
r300FetchStateParameter(ctx,
@@ -1225,8 +1226,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- struct r300_fragment_program_code *code = &fp->code.r300;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1266,9 +1266,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- struct r500_fragment_program_code *code = &fp->code.r500;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -1316,8 +1316,6 @@ static void r300SetupTextures(GLcontext * ctx)
int hw_tmu = 0;
int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
R300_STATECHANGE(r300, txe);
R300_STATECHANGE(r300, tex.filter);
@@ -1420,7 +1418,7 @@ static void r300SetupTextures(GLcontext * ctx)
cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
- if (fp->Base.UsesKill && last_hw_tmu < 0) {
+ if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
// The KILL operation requires the first texture unit
// to be enabled.
r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
@@ -1458,11 +1456,11 @@ static void r300SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1521,29 +1519,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
- if (InputsRead & FRAG_BIT_WPOS) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0);
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
@@ -1575,11 +1550,11 @@ static void r500SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1642,36 +1617,6 @@ static void r500SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
- if (InputsRead & FRAG_BIT_WPOS) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
@@ -2036,43 +1981,51 @@ static void r300ResetHwState(r300ContextPtr r300)
void r300UpdateShaders(r300ContextPtr rmesa)
{
- GLcontext *ctx;
- struct r300_fragment_program *fp;
- int i;
-
- ctx = rmesa->radeon.glCtx;
- fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ GLcontext *ctx = rmesa->radeon.glCtx;
/* should only happenen once, just after context is created */
/* TODO: shouldn't we fallback to sw here? */
- if (!fp) {
+ if (!ctx->FragmentProgram._Current) {
_mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
return;
}
- if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) {
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- rmesa->temp_attrib[i] =
- TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- &rmesa->dummy_attrib[i];
- }
+ {
+ struct r300_fragment_program *fp;
- _tnl_UpdateFixedFunctionProgram(ctx);
+ fp = r300SelectFragmentShader(ctx);
+ if (!fp->translated)
+ r300TranslateFragmentShader(ctx, fp);
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- rmesa->temp_attrib[i];
- }
-
- r300SelectVertexShader(rmesa);
- r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error);
+ r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
}
- if (!fp->translated || rmesa->radeon.NewGLState)
- r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
+ if (rmesa->options.hw_tcl_enabled) {
+ struct r300_vertex_program *vp;
+
+ if (rmesa->radeon.NewGLState) {
+ int i;
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ rmesa->temp_attrib[i] =
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ &rmesa->dummy_attrib[i];
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ rmesa->temp_attrib[i];
+ }
+ }
+
+ vp = r300SelectVertexShader(ctx);
+ if (!vp->translated)
+ r300TranslateVertexShader(vp);
- r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+ r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
+ }
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
rmesa->radeon.NewGLState = 0;
@@ -2102,7 +2055,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
static void r300SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ struct r300_fragment_program *fp = rmesa->selected_fp;
struct r300_fragment_program_code *code;
int i, k;
@@ -2148,8 +2101,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->Base.Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2174,7 +2126,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
static void r500SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ struct r300_fragment_program *fp = rmesa->selected_fp;
int i;
struct r500_fragment_program_code *code;
@@ -2210,8 +2162,7 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->Base.Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
@@ -2274,20 +2225,17 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
}
void r300UpdateShaderStates(r300ContextPtr rmesa)
{
GLcontext *ctx;
ctx = rmesa->radeon.glCtx;
- struct r300_fragment_program *r300_fp;
-
- r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
/* should only happenen once, just after context is created */
- if (!r300_fp)
+ if (!ctx->FragmentProgram._Current)
return;
r300SetEarlyZState(ctx);
@@ -2323,8 +2271,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
R300_STATECHANGE(r300, cb);
}
- r300UpdateStateParameters(ctx, new_state);
-
r300->radeon.NewGLState |= new_state;
}
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
index 2328289420..d46bf9f179 100644
--- a/src/mesa/drivers/dri/r300/r300_state.h
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
void r300UpdateViewportOffset (GLcontext * ctx);
void r300UpdateDrawBuffer (GLcontext * ctx);
-void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index ce4179208e..56ed519cf4 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
GLuint InputsRead = 0;
GLuint OutputsWritten = 0;
int num_attrs = 0;
- GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead;
+ GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
struct vertex_attribute *attrs = rmesa->vbuf.attribs;
rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
@@ -150,6 +150,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
}
+ if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
+ if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
/**
* Sending only one texcoord component may lead to lock up,
* so for all textures always output 4 texcoord components to RS.
@@ -192,31 +208,9 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
}
}
- /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
- if (fp_reads & FRAG_BIT_WPOS) {
- if (first_free_tex >= ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tout of free texcoords to write w pos\n");
- _mesa_exit(-1);
- }
-
- InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
- OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
- ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0);
- ++first_free_tex;
- }
-
- if (fp_reads & FRAG_BIT_FOGC) {
- if (first_free_tex >= ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
- _mesa_exit(-1);
- }
-
- InputsRead |= 1 << VERT_ATTRIB_FOG;
- OutputsWritten |= 1 << VERT_RESULT_FOGC;
- GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
- EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
- ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+ if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+ _mesa_exit(-1);
}
R300_NEWPRIM(rmesa);
@@ -497,11 +491,13 @@ void r300RenderStart(GLcontext *ctx)
r300ContextPtr rmesa = R300_CONTEXT( ctx );
r300ChooseRenderState(ctx);
+
+ r300UpdateShaders(rmesa);
+
r300PrepareVertices(ctx);
r300ValidateBuffers(ctx);
- r300UpdateShaders(rmesa);
r300UpdateShaderStates(rmesa);
r300EmitCacheFlush(rmesa);
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index c41a8fdd62..de32013032 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -32,12 +32,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
+#include "shader/programopt.h"
#include "shader/prog_instruction.h"
+#include "shader/prog_optimize.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
+#include "radeon_nqssadce.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -71,15 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
} while (0)
-int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp, float *dst)
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
{
int pi;
- struct gl_vertex_program *mesa_vp = &vp->mesa_program;
float *dst_o = dst;
struct gl_program_parameter_list *paramList;
- if (mesa_vp->IsNVProgram) {
+ if (vp->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
@@ -91,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
return dst - dst_o;
}
- assert(mesa_vp->Base.Parameters);
- _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+ if (!vp->Base.Parameters)
+ return 0;
- if (mesa_vp->Base.Parameters->NumParameters * 4 >
+ _mesa_load_state_parameters(ctx, vp->Base.Parameters);
+
+ if (vp->Base.Parameters->NumParameters * 4 >
VSF_MAX_FRAGMENT_LENGTH) {
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
- paramList = mesa_vp->Base.Parameters;
+ paramList = vp->Base.Parameters;
for (pi = 0; pi < paramList->NumParameters; pi++) {
switch (paramList->Parameters[pi].Type) {
case PROGRAM_STATE_VAR:
@@ -933,10 +936,14 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
{
int i;
int cur_reg;
+ GLuint OutputsWritten, InputsRead;
+
+ OutputsWritten = vp->Base->Base.OutputsWritten;
+ InputsRead = vp->Base->Base.InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (vp->key.InputsRead & (1 << i))
+ if (InputsRead & (1 << i))
vp->inputs[i] = ++cur_reg;
else
vp->inputs[i] = -1;
@@ -946,13 +953,13 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
for (i = 0; i < VERT_RESULT_MAX; i++)
vp->outputs[i] = -1;
- assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
@@ -962,46 +969,46 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
* pretend it does by skipping output index reg so the colors
* get written into appropriate output vectors.
*/
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
vp->outputs[VERT_RESULT_COL1] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (vp->key.OutputsWritten & (1 << i)) {
+ if (OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
}
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
- struct prog_instruction *vpi)
+void r300TranslateVertexShader(struct r300_vertex_program *vp)
{
+ struct prog_instruction *vpi = vp->Base->Base.Instructions;
int i;
GLuint *inst;
unsigned long num_operands;
@@ -1191,313 +1198,463 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
}
}
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
+
+ vpi = &prog->Instructions[prog->NumInstructions - 3];
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_HPOS;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_MOV;
-static void position_invariant(struct gl_program *prog)
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_END;
+}
+
+static void pos_as_texcoord(struct gl_program *prog, int tex_id)
{
struct prog_instruction *vpi;
- struct gl_program_parameter_list *paramList;
- int i;
+ GLuint tempregi = prog->NumTemporaries;
- gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+ prog->NumTemporaries++;
- /* tokens[4] = matrix modifier */
-#ifdef PREFER_DP4
- tokens[4] = 0; /* not transposed or inverted */
-#else
- tokens[4] = STATE_MATRIX_TRANSPOSE;
-#endif
- paramList = prog->Parameters;
-
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
- _mesa_init_instructions(vpi, prog->NumInstructions + 4);
-
- for (i = 0; i < 4; i++) {
- GLint idx;
- tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
- idx = _mesa_add_state_reference(paramList, tokens);
-#ifdef PREFER_DP4
- vpi[i].Opcode = OPCODE_DP4;
- vpi[i].StringPos = 0;
- vpi[i].Data = 0;
-
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- vpi[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi[i].DstReg.WriteMask = 1 << i;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
-#else
- if (i == 0)
- vpi[i].Opcode = OPCODE_MUL;
- else
- vpi[i].Opcode = OPCODE_MAD;
+ for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = tempregi;
+ }
+ }
- vpi[i].Data = 0;
+ insert_wpos(prog, tempregi, tex_id);
- if (i == 3)
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- else
- vpi[i].DstReg.File = PROGRAM_TEMPORARY;
- vpi[i].DstReg.Index = 0;
- vpi[i].DstReg.WriteMask = 0xf;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
-
- if (i > 0) {
- vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
- vpi[i].SrcReg[2].Index = 0;
- vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
+
+/**
+ * The fogcoord attribute is special in that only the first component
+ * is relevant, and the remaining components are always fixed (when read
+ * from by the fragment program) to yield an X001 pattern.
+ *
+ * We need to enforce this either in the vertex program or in the fragment
+ * program, and this code chooses not to enforce it in the vertex program.
+ * This is slightly cheaper, as long as the fragment program does not use
+ * weird swizzles.
+ *
+ * And it seems that usually, weird swizzles are not used, so...
+ *
+ * See also the counterpart rewriting for fragment programs.
+ */
+static void fog_as_texcoord(struct gl_program *prog, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ vpi = prog->Instructions;
+ while (vpi->Opcode != OPCODE_END) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_X;
}
-#endif
+
+ ++vpi;
}
- _mesa_copy_instructions(&vpi[i], prog->Instructions,
- prog->NumInstructions);
+ prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
- free(prog->Instructions);
+static int translateABS(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
- prog->Instructions = vpi;
+ inst = &prog->Instructions[pos];
- prog->NumInstructions += 4;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst->Opcode = OPCODE_MAX;
+ inst->SrcReg[1] = inst->SrcReg[0];
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
- assert(vpi->Opcode == OPCODE_END);
+ return 0;
}
-static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
- GLuint temp_index)
+static int translateDP3(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- struct prog_instruction *vpi_insert;
- int i = 0;
+ struct prog_instruction *inst;
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
- _mesa_init_instructions(vpi, prog->NumInstructions + 2);
- /* all but END */
- _mesa_copy_instructions(vpi, prog->Instructions,
- prog->NumInstructions - 1);
- /* END */
- _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
- &prog->Instructions[prog->NumInstructions - 1],
- 1);
- vpi_insert = &vpi[prog->NumInstructions - 1];
+ inst = &prog->Instructions[pos];
- vpi_insert[i].Opcode = OPCODE_MOV;
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ return 0;
+}
+
+static int translateDPH(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ return 0;
+}
- vpi_insert[i].Opcode = OPCODE_MOV;
+static int translateFLR(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ struct prog_dst_register dst;
+ int tmp_idx;
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ tmp_idx = prog->NumTemporaries++;
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ _mesa_insert_instructions(prog, pos + 1, 1);
- free(prog->Instructions);
+ inst = &prog->Instructions[pos];
+ dst = inst->DstReg;
- prog->Instructions = vpi;
+ inst->Opcode = OPCODE_FRC;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ ++inst;
- prog->NumInstructions += i;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst->Opcode = OPCODE_ADD;
+ inst->DstReg = dst;
+ inst->SrcReg[0] = (inst-1)->SrcReg[0];
+ inst->SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[1].Index = tmp_idx;
+ inst->SrcReg[1].Negate = NEGATE_XYZW;
- assert(vpi->Opcode == OPCODE_END);
+ return 1;
}
-static void pos_as_texcoord(struct r300_vertex_program *vp,
- struct gl_program *prog)
+static int translateSUB(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
- /* should do something else if no temps left... */
- prog->NumTemporaries++;
+ struct prog_instruction *inst;
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT
- && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
- }
- }
- insert_wpos(vp, prog, tempregi);
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_ADD;
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+
+ return 0;
}
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key
- *wanted_key, struct gl_vertex_program
- *mesa_vp, GLint wpos_idx)
+static int translateSWZ(struct gl_program *prog, int pos)
{
- struct r300_vertex_program *vp;
+ prog->Instructions[pos].Opcode = OPCODE_MOV;
- vp = _mesa_calloc(sizeof(*vp));
- _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- vp->wpos_idx = wpos_idx;
+ return 0;
+}
- if (mesa_vp->IsPositionInvariant) {
- position_invariant(&mesa_vp->Base);
- }
+static int translateXPD(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ int tmp_idx;
+
+ tmp_idx = prog->NumTemporaries++;
+
+ _mesa_insert_instructions(prog, pos + 1, 1);
+
+ inst = &prog->Instructions[pos];
+
+ *(inst+1) = *inst;
+
+ inst->Opcode = OPCODE_MUL;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ ++inst;
+
+ inst->Opcode = OPCODE_MAD;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[2].Index = tmp_idx;
+
+ return 1;
+}
+
+static void translateInsts(struct gl_program *prog)
+{
+ struct prog_instruction *inst;
+ int i;
+
+ for (i = 0; i < prog->NumInstructions; ++i) {
+ inst = &prog->Instructions[i];
- if (wpos_idx > -1) {
- pos_as_texcoord(vp, &mesa_vp->Base);
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ i += translateABS(prog, i);
+ break;
+ case OPCODE_DP3:
+ i += translateDP3(prog, i);
+ break;
+ case OPCODE_DPH:
+ i += translateDPH(prog, i);
+ break;
+ case OPCODE_FLR:
+ i += translateFLR(prog, i);
+ break;
+ case OPCODE_SUB:
+ i += translateSUB(prog, i);
+ break;
+ case OPCODE_SWZ:
+ i += translateSWZ(prog, i);
+ break;
+ case OPCODE_XPD:
+ i += translateXPD(prog, i);
+ break;
+ default:
+ break;
+ }
}
+}
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Vertex program after native rewrite:\n");
- _mesa_print_program(&mesa_vp->Base);
- fflush(stdout);
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
+ OutputsAdded |= 1 << (vp_result); \
+ count++; \
+ } \
+ } while (0)
+
+static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint OutputsAdded, FpReads;
+ int i, count;
+
+ OutputsAdded = 0;
+ count = 0;
+ FpReads = r300->selected_fp->Base->InputsRead;
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i < 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
}
/* Some outputs may be artificially added, to match the inputs of the fragment program.
* Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
* vertex program are undefined, so just use MOV [vertex_result], CONST[0]
*/
- {
- int i, count = 0;
+ if (count > 0) {
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
+ inst = &prog->Instructions[prog->NumInstructions - 1 - count];
+
for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (vp->key.OutputsAdded & (1 << i)) {
- ++count;
+ if (OutputsAdded & (1 << i)) {
+ inst->Opcode = OPCODE_MOV;
+
+ inst->DstReg.File = PROGRAM_OUTPUT;
+ inst->DstReg.Index = i;
+ inst->DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->DstReg.CondMask = COND_TR;
+
+ inst->SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->SrcReg[0].Index = 0;
+ inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++inst;
}
}
- if (count > 0) {
- struct prog_instruction *inst;
+ prog->OutputsWritten |= OutputsAdded;
+ }
+}
- _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
- inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
+#undef ADD_OUTPUT
- for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (vp->key.OutputsAdded & (1 << i)) {
- inst->Opcode = OPCODE_MOV;
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
+ GLuint fp_reads;
- inst->DstReg.File = PROGRAM_OUTPUT;
- inst->DstReg.Index = i;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->DstReg.CondMask = COND_TR;
+ fp_reads = r300->selected_fp->Base->InputsRead;
+ {
+ if (fp_reads & FRAG_BIT_COL0) {
+ s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
+ }
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = 0;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ if (fp_reads & FRAG_BIT_COL1) {
+ s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+ }
+ }
- ++inst;
- }
+ {
+ int i;
+ for (i = 0; i < 8; ++i) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
}
}
}
- assert(mesa_vp->Base.NumInstructions);
- vp->num_temporaries = mesa_vp->Base.NumTemporaries;
- r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
-
- return vp;
+ s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
+ if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
-static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
{
- if (key->OutputsWritten & (1 << vert))
- return;
+ (void) opcode;
+ (void) reg;
- key->OutputsWritten |= 1 << vert;
- key->OutputsAdded |= 1 << vert;
+ return GL_TRUE;
}
-void r300SelectVertexShader(r300ContextPtr r300)
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+ struct r300_vertex_program_key *wanted_key,
+ const struct gl_vertex_program *mesa_vp)
{
- GLcontext *ctx = ctx = r300->radeon.glCtx;
- GLuint InputsRead;
- struct r300_vertex_program_key wanted_key = { 0 };
- GLint i;
- struct r300_vertex_program_cont *vpc;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
- GLint wpos_idx;
+ struct gl_program *prog;
- vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
- wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-
- wpos_idx = -1;
- if (InputsRead & FRAG_BIT_WPOS) {
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
- break;
+ vp = _mesa_calloc(sizeof(*vp));
+ vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+ _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- if (i == ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tno free texcoord found\n");
- _mesa_exit(-1);
- }
+ prog = &vp->Base->Base;
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
+ }
- wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- wpos_idx = i;
+ if (vp->Base->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, vp->Base);
}
- if (vpc->mesa_program.IsPositionInvariant) {
- wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
- wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
- } else {
- add_outputs(&wanted_key, VERT_RESULT_HPOS);
+ if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0);
}
- if (InputsRead & FRAG_BIT_COL0) {
- add_outputs(&wanted_key, VERT_RESULT_COL0);
+ if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0);
}
- if (InputsRead & FRAG_BIT_COL1) {
- add_outputs(&wanted_key, VERT_RESULT_COL1);
+ addArtificialOutputs(ctx, prog);
+
+ translateInsts(prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
}
- if (InputsRead & FRAG_BIT_FOGC) {
- add_outputs(&wanted_key, VERT_RESULT_FOGC);
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(ctx, prog, &nqssadce);
+
+ /* We need this step for reusing temporary registers */
+ _mesa_optimize_program(ctx, prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
+ }
}
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i)) {
- add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+ assert(prog->NumInstructions);
+ {
+ struct prog_instruction *inst;
+ int max, i, tmp;
+
+ inst = prog->Instructions;
+ max = -1;
+ while (inst->Opcode != OPCODE_END) {
+ tmp = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < tmp; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if ((int) inst->SrcReg[i].Index > max) {
+ max = inst->SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (_mesa_num_inst_dst_regs(inst->Opcode)) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if ((int) inst->DstReg.Index > max) {
+ max = inst->DstReg.Index;
+ }
+ }
+ }
+ ++inst;
}
+
+ /* We actually want highest index of used temporary register,
+ * not the number of temporaries used.
+ * These values aren't always the same.
+ */
+ vp->num_temporaries = max + 1;
}
- for (vp = vpc->progs; vp; vp = vp->next)
+ return vp;
+}
+
+struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_cont *vpc;
+ struct r300_vertex_program *vp;
+
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+ wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+ for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
== 0) {
- r300->selected_vp = vp;
- return;
+ return r300->selected_vp = vp;
}
-
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Initial vertex program:\n");
- _mesa_print_program(&vpc->mesa_program.Base);
- fflush(stdout);
}
- vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
+ vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
vp->next = vpc->progs;
vpc->progs = vp;
- r300->selected_vp = vp;
+
+ return r300->selected_vp = vp;
}
#define bump_vpu_count(ptr, new_count) do { \
@@ -1544,25 +1701,22 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
struct r300_vertex_program *prog = rmesa->selected_vp;
int inst_count = 0;
int param_count = 0;
-
+
/* Reset state, in case we don't use something */
((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
-
+
R300_STATECHANGE(rmesa, vpp);
- param_count = r300VertexProgUpdateParams(ctx,
- (struct r300_vertex_program_cont *)
- ctx->VertexProgram._Current,
- (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
inst_count = (prog->hw_code.length / 4) - 1;
- r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
- _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
+ r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
+ _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
index b552e3fb1b..2dab11c337 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.h
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -34,4 +34,8 @@
void r300SetupVertexProgram(r300ContextPtr rmesa);
+struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx);
+
+void r300TranslateVertexShader(struct r300_vertex_program *vp);
+
#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
index 4a2e1cba40..840c9733b1 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
@@ -46,6 +46,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
+ case PROGRAM_ADDRESS: return &s->Address;
default: return 0;
}
}
@@ -56,7 +57,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
-static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
@@ -114,47 +115,19 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
deswz_source = sourced;
}
- struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ struct register_state *regstate;
+
+ if (inst->SrcReg[src].RelAddr)
+ regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
+ else
+ regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
return inst;
}
-
-static void rewrite_depth_out(struct prog_instruction *inst)
-{
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
- } else {
- inst->DstReg.WriteMask = 0;
- return;
- }
-
- switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
- }
-}
-
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
@@ -189,11 +162,6 @@ static void process_instruction(struct nqssadce_state* s)
return;
if (inst->Opcode != OPCODE_KIL) {
- if (s->Descr->RewriteDepthOut) {
- if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
- rewrite_depth_out(inst);
- }
-
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
@@ -217,6 +185,7 @@ static void process_instruction(struct nqssadce_state* s)
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
@@ -227,6 +196,8 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
+ case OPCODE_SGE:
+ case OPCODE_SLT:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
break;
@@ -258,12 +229,51 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
break;
+ case OPCODE_DST:
+ inst = track_used_srcreg(s, inst, 0, 0x6);
+ inst = track_used_srcreg(s, inst, 1, 0xa);
+ break;
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ case OPCODE_POW:
+ inst = track_used_srcreg(s, inst, 0, 0x3);
+ break;
+ case OPCODE_LIT:
+ inst = track_used_srcreg(s, inst, 0, 0xb);
+ break;
default:
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
+static void calculateInputsOutputs(struct gl_program *p)
+{
+ struct prog_instruction *inst;
+ GLuint InputsRead, OutputsWritten;
+
+ inst = p->Instructions;
+ InputsRead = 0;
+ OutputsWritten = 0;
+ while (inst->Opcode != OPCODE_END)
+ {
+ int i, num_src_regs;
+
+ num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < num_src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT)
+ InputsRead |= 1 << inst->SrcReg[i].Index;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT)
+ OutputsWritten |= 1 << inst->DstReg.Index;
+
+ ++inst;
+ }
+
+ p->InputsRead = InputsRead;
+ p->OutputsWritten = OutputsWritten;
+}
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
{
@@ -280,4 +290,6 @@ void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce
s.IP--;
process_instruction(&s);
}
+
+ calculateInputsOutputs(p);
}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
index a4f94abcb6..8626f21c25 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
@@ -58,6 +58,7 @@ struct nqssadce_state {
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
+ struct register_state Address;
};
@@ -83,14 +84,10 @@ struct radeon_nqssadce_descr {
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
- /**
- * Rewrite instructions that write to DEPR.z to write to DEPR.w
- * instead (rewriting is done *before* the WriteMask test).
- */
- GLboolean RewriteDepthOut;
void *Data;
};
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h
index b411f69bc8..88474d43a2 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/radeon_program.h
@@ -52,6 +52,38 @@ enum {
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+static inline GLuint get_swz(GLuint swz, GLuint idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+
/**
* Transformation context that is passed to local transformations.
*
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 906d36e522..d6fb474cf2 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -870,7 +870,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
- s.Program = program;
+ s.Program = _mesa_clone_program(ctx, program);
s.Handler = handler;
s.UserData = userdata;
s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
@@ -904,6 +904,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_free(s.ValuePool);
_mesa_free(s.ReaderPool);
+ _mesa_reference_program(ctx, &s.Program, NULL);
+
return !s.Error;
}