diff options
author | Roland Scheidegger <sroland@tungstengraphics.com> | 2008-04-12 02:40:27 +0200 |
---|---|---|
committer | Roland Scheidegger <sroland@tungstengraphics.com> | 2008-04-12 02:40:27 +0200 |
commit | 32134b5508495fbb1d9fc2f844e22fbb082dcda6 (patch) | |
tree | 02518cc8118be17a009c25facc6b3ace4eacaf91 /src | |
parent | 51ad219d6fbcdaa50e2f1b5854dfbbc4b8cab8fc (diff) |
r200: fix XPD vertex program instruction when using temps as inputs
due to the two read ports limit into temp memory may need the MAD_2 instruction
for the second instruction of the decomposed XPD.
While here, also try to avoid MAD_2 for MAD if all 3 inputs are temps but the
temps aren't actually distinct.
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_vertprog.c | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 7fba6c750d..b1e027dc1c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -745,9 +745,16 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte goto next; case OPCODE_MAD: + /* only 2 read ports into temp memory thus may need the macro op MAD_2 + instead (requiring 2 clocks) if all inputs are in temp memory + (and, only if they actually reference 3 distinct temps) */ hw_op=(src[0].File == PROGRAM_TEMPORARY && src[1].File == PROGRAM_TEMPORARY && - src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; + src[2].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) && + (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); @@ -875,8 +882,11 @@ else { case OPCODE_XPD: /* mul r0, r1.yzxw, r2.zxyw mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 */ + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ? + R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, @@ -902,7 +912,7 @@ else { o_inst++; u_temp_i--; - o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst), + o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), |