summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-11-11 13:26:26 -0800
committerEric Anholt <eric@anholt.net>2009-11-13 13:18:22 -0800
commitd6690ce15fb8c7c6abf1bc0d847c1d2da2c33904 (patch)
treef612e5164537642d1c2c4cb1243433e47ecd7038
parent654122ba7b813683a893d60d10ca201258deface (diff)
mesa: Improve the eliminate-move-use to work across multiple instructions.
This shaves more instructions off of the VS of my GL demo, but no performance difference this time at n=6. This also fixes a regression that was in this path, which is now piglit's glsl-vs-mov-after-deref.
-rw-r--r--src/mesa/shader/prog_optimize.c126
1 files changed, 85 insertions, 41 deletions
diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c
index 8638754e24..3d28d885a4 100644
--- a/src/mesa/shader/prog_optimize.c
+++ b/src/mesa/shader/prog_optimize.c
@@ -65,6 +65,7 @@ get_src_arg_mask(const struct prog_instruction *inst, int arg)
case OPCODE_DP2:
return WRITEMASK_XY;
case OPCODE_DP3:
+ case OPCODE_XPD:
return WRITEMASK_XYZ;
default:
return WRITEMASK_XYZW;
@@ -396,6 +397,26 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
return END;
}
+static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+{
+ switch (opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_END:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_RET:
+ return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
/**
* Try to remove use of extraneous MOV instructions, to free them up for dead
@@ -404,7 +425,7 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
static void
_mesa_remove_extra_move_use(struct gl_program *prog)
{
- GLuint i;
+ GLuint i, j;
if (dbg) {
_mesa_printf("Optimize: Begin remove extra move use\n");
@@ -414,63 +435,86 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
/*
* Look for sequences such as this:
* MOV tmpX, arg0;
+ * ...
* FOO tmpY, tmpX, arg1;
* and convert into:
* MOV tmpX, arg0;
+ * ...
* FOO tmpY, arg0, arg1;
*/
for (i = 0; i < prog->NumInstructions - 1; i++) {
const struct prog_instruction *mov = prog->Instructions + i;
- struct prog_instruction *inst2 = prog->Instructions + i + 1;
- int arg;
if (mov->Opcode != OPCODE_MOV ||
mov->DstReg.File != PROGRAM_TEMPORARY ||
mov->DstReg.RelAddr ||
mov->DstReg.CondMask != COND_TR ||
- mov->SaturateMode != SATURATE_OFF)
+ mov->SaturateMode != SATURATE_OFF ||
+ mov->SrcReg[0].RelAddr)
continue;
- for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
- int comp;
- int read_mask = get_src_arg_mask(inst2, arg);
-
- if (inst2->SrcReg[arg].File != mov->DstReg.File ||
- inst2->SrcReg[arg].Index != mov->DstReg.Index ||
- inst2->SrcReg[arg].RelAddr ||
- inst2->SrcReg[arg].Abs)
- continue;
-
- /* Check that all the sources for this arg of inst2 come from inst1
- * or constants.
- */
- for (comp = 0; comp < 4; comp++) {
- int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- /* If the MOV didn't write that channel, can't use it. */
- if ((read_mask & (1 << comp)) &&
- src_swz <= SWIZZLE_W &&
- (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
- break;
- }
- if (comp != 4)
- continue;
-
- /* Adjust the swizzles of inst2 to point at MOV's source */
- for (comp = 0; comp < 4; comp++) {
- int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- if (inst2_swz <= SWIZZLE_W) {
- GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
- inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
- inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
- inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
- inst2_swz) & 0x1) << comp);
+ /* Walk through remaining instructions until the or src reg gets
+ * rewritten or we get into some flow-control, eliminating the use of
+ * this MOV.
+ */
+ for (j = i + 1; j < prog->NumInstructions; j++) {
+ struct prog_instruction *inst2 = prog->Instructions + j;
+ int arg;
+
+ if (_mesa_is_flow_control_opcode(inst2->Opcode))
+ break;
+
+ /* First rewrite this instruction's args if appropriate. */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
+ int comp;
+ int read_mask = get_src_arg_mask(inst2, arg);
+
+ if (inst2->SrcReg[arg].File != mov->DstReg.File ||
+ inst2->SrcReg[arg].Index != mov->DstReg.Index ||
+ inst2->SrcReg[arg].RelAddr ||
+ inst2->SrcReg[arg].Abs)
+ continue;
+
+ /* Check that all the sources for this arg of inst2 come from inst1
+ * or constants.
+ */
+ for (comp = 0; comp < 4; comp++) {
+ int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+
+ /* If the MOV didn't write that channel, can't use it. */
+ if ((read_mask & (1 << comp)) &&
+ src_swz <= SWIZZLE_W &&
+ (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
+ break;
+ }
+ if (comp != 4)
+ continue;
+
+ /* Adjust the swizzles of inst2 to point at MOV's source */
+ for (comp = 0; comp < 4; comp++) {
+ int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+
+ if (inst2_swz <= SWIZZLE_W) {
+ GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
}
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
}
- inst2->SrcReg[arg].File = mov->SrcReg[0].File;
- inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+
+ /* If this instruction overwrote part of the move, our time is up. */
+ if ((inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) ||
+ (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)))
+ break;
}
}