From d6690ce15fb8c7c6abf1bc0d847c1d2da2c33904 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 11 Nov 2009 13:26:26 -0800 Subject: mesa: Improve the eliminate-move-use to work across multiple instructions. This shaves more instructions off of the VS of my GL demo, but no performance difference this time at n=6. This also fixes a regression that was in this path, which is now piglit's glsl-vs-mov-after-deref. --- src/mesa/shader/prog_optimize.c | 126 +++++++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 41 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index 8638754e24..3d28d885a4 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -65,6 +65,7 @@ get_src_arg_mask(const struct prog_instruction *inst, int arg) case OPCODE_DP2: return WRITEMASK_XY; case OPCODE_DP3: + case OPCODE_XPD: return WRITEMASK_XYZ; default: return WRITEMASK_XYZW; @@ -396,6 +397,26 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) return END; } +static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) +{ + switch (opcode) { + case OPCODE_BGNLOOP: + case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_IF: + case OPCODE_ELSE: + case OPCODE_END: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSUB: + case OPCODE_RET: + return GL_TRUE; + default: + return GL_FALSE; + } +} /** * Try to remove use of extraneous MOV instructions, to free them up for dead @@ -404,7 +425,7 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) static void _mesa_remove_extra_move_use(struct gl_program *prog) { - GLuint i; + GLuint i, j; if (dbg) { _mesa_printf("Optimize: Begin remove extra move use\n"); @@ -414,63 +435,86 @@ _mesa_remove_extra_move_use(struct gl_program *prog) /* * Look for sequences such as this: * MOV tmpX, arg0; + * ... * FOO tmpY, tmpX, arg1; * and convert into: * MOV tmpX, arg0; + * ... * FOO tmpY, arg0, arg1; */ for (i = 0; i < prog->NumInstructions - 1; i++) { const struct prog_instruction *mov = prog->Instructions + i; - struct prog_instruction *inst2 = prog->Instructions + i + 1; - int arg; if (mov->Opcode != OPCODE_MOV || mov->DstReg.File != PROGRAM_TEMPORARY || mov->DstReg.RelAddr || mov->DstReg.CondMask != COND_TR || - mov->SaturateMode != SATURATE_OFF) + mov->SaturateMode != SATURATE_OFF || + mov->SrcReg[0].RelAddr) continue; - for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { - int comp; - int read_mask = get_src_arg_mask(inst2, arg); - - if (inst2->SrcReg[arg].File != mov->DstReg.File || - inst2->SrcReg[arg].Index != mov->DstReg.Index || - inst2->SrcReg[arg].RelAddr || - inst2->SrcReg[arg].Abs) - continue; - - /* Check that all the sources for this arg of inst2 come from inst1 - * or constants. - */ - for (comp = 0; comp < 4; comp++) { - int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - /* If the MOV didn't write that channel, can't use it. */ - if ((read_mask & (1 << comp)) && - src_swz <= SWIZZLE_W && - (mov->DstReg.WriteMask & (1 << src_swz)) == 0) - break; - } - if (comp != 4) - continue; - - /* Adjust the swizzles of inst2 to point at MOV's source */ - for (comp = 0; comp < 4; comp++) { - int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - if (inst2_swz <= SWIZZLE_W) { - GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); - inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); - inst2->SrcReg[arg].Swizzle |= s << (3 * comp); - inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> - inst2_swz) & 0x1) << comp); + /* Walk through remaining instructions until the or src reg gets + * rewritten or we get into some flow-control, eliminating the use of + * this MOV. + */ + for (j = i + 1; j < prog->NumInstructions; j++) { + struct prog_instruction *inst2 = prog->Instructions + j; + int arg; + + if (_mesa_is_flow_control_opcode(inst2->Opcode)) + break; + + /* First rewrite this instruction's args if appropriate. */ + for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { + int comp; + int read_mask = get_src_arg_mask(inst2, arg); + + if (inst2->SrcReg[arg].File != mov->DstReg.File || + inst2->SrcReg[arg].Index != mov->DstReg.Index || + inst2->SrcReg[arg].RelAddr || + inst2->SrcReg[arg].Abs) + continue; + + /* Check that all the sources for this arg of inst2 come from inst1 + * or constants. + */ + for (comp = 0; comp < 4; comp++) { + int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + + /* If the MOV didn't write that channel, can't use it. */ + if ((read_mask & (1 << comp)) && + src_swz <= SWIZZLE_W && + (mov->DstReg.WriteMask & (1 << src_swz)) == 0) + break; + } + if (comp != 4) + continue; + + /* Adjust the swizzles of inst2 to point at MOV's source */ + for (comp = 0; comp < 4; comp++) { + int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + + if (inst2_swz <= SWIZZLE_W) { + GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); + inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); + inst2->SrcReg[arg].Swizzle |= s << (3 * comp); + inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> + inst2_swz) & 0x1) << comp); + } } + inst2->SrcReg[arg].File = mov->SrcReg[0].File; + inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; } - inst2->SrcReg[arg].File = mov->SrcReg[0].File; - inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + + /* If this instruction overwrote part of the move, our time is up. */ + if ((inst2->DstReg.File == mov->DstReg.File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->DstReg.Index)) || + (inst2->DstReg.File == mov->SrcReg[0].File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->SrcReg[0].Index))) + break; } } -- cgit v1.2.3