diff options
author | Rune Peterson <rune@megahurts.dk> | 2007-02-14 23:10:52 +0100 |
---|---|---|
committer | Jerome Glisse <glisse@freedesktop.org> | 2007-02-14 23:10:52 +0100 |
commit | e4c772bead57c0190f74f1fa43dd129e170567f7 (patch) | |
tree | dfe32a524c09a2af2e3f8df9b67c444f50cb3db4 | |
parent | 1e2b46963595e02172b4d651cc8a219e9f5c8176 (diff) |
r300: Fix cos & add scs to fragment program.
So this do :
- Fixes COS.
- Does range reductions for SIN & COS.
- Adds SCS.
- removes the optimized version of SIN & COS.
- tweaked weight (should help on precision).
- fixed a copy paste typo in emit_arith().
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog.c | 270 |
2 files changed, 184 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index b140235159..48b50bca65 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -731,7 +731,7 @@ struct r300_fragment_program { int max_temp_idx; /* the index of the sin constant is stored here */ - GLint const_sin; + GLint const_sin[2]; GLuint optimization; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index b00cf9ed33..8e45bd5403 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -33,7 +33,6 @@ /*TODO'S * - * - SCS instructions * - Depth write, WPOS/FOGC inputs * - FogOption * - Verify results of opcodes for accuracy, I've only checked them @@ -1081,7 +1080,7 @@ static void emit_arith(struct r300_fragment_program *rp, break; } if (emit_sop && - (s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) { + (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) { vpos = spos = MAX2(vpos, spos); break; } @@ -1204,6 +1203,25 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr) } #endif +static void make_sin_const(struct r300_fragment_program *rp) +{ + if(rp->const_sin[0] == -1){ + GLfloat cnstv[4]; + + cnstv[0] = 1.273239545; // 4/PI + cnstv[1] =-0.405284735; // -4/(PI*PI) + cnstv[2] = 3.141592654; // PI + cnstv[3] = 0.2225; // weight + rp->const_sin[0] = emit_const4fv(rp, cnstv); + + cnstv[0] = 0.5; + cnstv[1] = -1.5; + cnstv[2] = 0.159154943; // 1/(2*PI) + cnstv[3] = 6.283185307; // 2*PI + rp->const_sin[1] = emit_const4fv(rp, cnstv); + } +} + static GLboolean parse_program(struct r300_fragment_program *rp) { struct gl_fragment_program *mp = &rp->mesa_program; @@ -1260,84 +1278,68 @@ static GLboolean parse_program(struct r300_fragment_program *rp) * cos using a parabola (see SIN): * cos(x): * x += PI/2 - * x = (x < PI)?x : x-2*PI + * x = (x/(2*PI))+0.5 + * x = frac(x) + * x = (x*2*PI)-PI * result = sin(x) */ temp = get_temp_reg(rp); - if(rp->const_sin == -1){ - cnstv[0] = 1.273239545; - cnstv[1] =-0.405284735; - cnstv[2] = 3.141592654; - cnstv[3] = 0.225; - rp->const_sin = emit_const4fv(rp, cnstv); - } - cnst = rp->const_sin; + make_sin_const(rp); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); - emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, - pfs_half, - undef, - undef, - 0); + /* add 0.5*PI and do range reduction */ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, - swizzle(cnst, Z, Z, Z, Z), //PI + swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI pfs_half, swizzle(keep(src[0]), X, X, X, X), 0); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, - negate(swizzle(temp, W, W, W, W)), //-2 - swizzle(cnst, Z, Z, Z, Z), //PI + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, swizzle(temp, X, X, X, X), + swizzle(rp->const_sin[1], Z, Z, Z, Z), + pfs_half, 0); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(cnst, Z, Z, Z, Z), //PI - negate(pfs_half), - swizzle(src[0], X, X, X, X), + emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X, + swizzle(temp, X, X, X, X), + undef, + undef, 0); - - emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, swizzle(temp, X, X, X, X), - swizzle(temp, Y, Y, Y, Y), + swizzle(rp->const_sin[1], W, W, W, W), //2*PI + negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI 0); /* SIN */ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, swizzle(temp, Z, Z, Z, Z), - cnst, + rp->const_sin[0], pfs_zero, 0); - if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){ - emit_arith(rp, PFS_OP_MAD, dest, mask, - swizzle(temp, Y, Y, Y, Y), - absolute(swizzle(temp, Z, Z, Z, Z)), - swizzle(temp, X, X, X, X), - flags); - }else{ - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, - swizzle(temp, Y, Y, Y, Y), - absolute(swizzle(temp, Z, Z, Z, Z)), - swizzle(temp, X, X, X, X), - 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), - absolute(swizzle(temp, X, X, X, X)), - negate(swizzle(temp, X, X, X, X)), - 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); - emit_arith(rp, PFS_OP_MAD, dest, mask, - swizzle(temp, Y, Y, Y, Y), - swizzle(cnst, W, W, W, W), - swizzle(temp, X, X, X, X), - flags); - } + emit_arith(rp, PFS_OP_MAD, dest, mask, + swizzle(temp, Y, Y, Y, Y), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), + flags); + free_temp(rp, temp); break; case OPCODE_DP3: @@ -1577,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp) flags); break; case OPCODE_SCS: - ERROR("SCS not implemented\n"); + /* + * cos using a parabola (see SIN): + * cos(x): + * x += PI/2 + * x = (x/(2*PI))+0.5 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp = get_temp_reg(rp); + make_sin_const(rp); + src[0] = t_scalar_src(rp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y, + swizzle(rp->const_sin[0], Z, Z, Z, Z), + rp->const_sin[1], + swizzle(keep(src[0]), X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W, + swizzle(rp->const_sin[0], Z, Z, Z, Z), + negate(pfs_half), + swizzle(keep(src[0]), X, X, X, X), + 0); + + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, + swizzle(temp, X, X, X, X), + swizzle(temp, Y, Y, Y, Y), + swizzle(temp, W, W, W, W), + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(temp, Z, Z, Z, Z), + rp->const_sin[0], + pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); + + if(mask & WRITEMASK_Y) + { + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(keep(src[0]), X, X, X, X), + rp->const_sin[0], + pfs_zero, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(keep(src[0]), X, X, X, X)), + swizzle(temp, X, X, X, X), + 0); + } + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), + absolute(swizzle(temp, W, W, W, W)), + negate(swizzle(temp, W, W, W, W)), + 0); + + emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X, + swizzle(temp, Z, Z, Z, Z), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, W, W, W, W), + flags); + + if(mask & WRITEMASK_Y) + { + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); + + emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y, + swizzle(temp, W, W, W, W), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), + flags); + } + free_temp(rp, temp); break; case OPCODE_SGE: src[0] = t_src(rp, fpi->SrcReg[0]); @@ -1603,48 +1691,56 @@ static GLboolean parse_program(struct r300_fragment_program *rp) */ temp = get_temp_reg(rp); - if(rp->const_sin == -1){ - cnstv[0] = 1.273239545; - cnstv[1] =-0.405284735; - cnstv[2] = 3.141592654; - cnstv[3] = 0.225; - rp->const_sin = emit_const4fv(rp, cnstv); - } - cnst = rp->const_sin; + make_sin_const(rp); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + /* do range reduction */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, swizzle(keep(src[0]), X, X, X, X), - cnst, + swizzle(rp->const_sin[1], Z, Z, Z, Z), + pfs_half, + 0); + + emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X, + swizzle(temp, X, X, X, X), + undef, + undef, + 0); + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z, + swizzle(temp, X, X, X, X), + swizzle(rp->const_sin[1], W, W, W, W), //2*PI + negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y, + swizzle(temp, Z, Z, Z, Z), + rp->const_sin[0], pfs_zero, 0); - if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){ - emit_arith(rp, PFS_OP_MAD, dest, mask, - swizzle(temp, Y, Y, Y, Y), - absolute(swizzle(src[0], X, X, X, X)), - swizzle(temp, X, X, X, X), - flags); - }else{ - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, - swizzle(temp, Y, Y, Y, Y), - absolute(swizzle(src[0], X, X, X, X)), - swizzle(temp, X, X, X, X), - 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + swizzle(temp, Y, Y, Y, Y), + absolute(swizzle(temp, Z, Z, Z, Z)), + swizzle(temp, X, X, X, X), + 0); - emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), - absolute(swizzle(temp, X, X, X, X)), - negate(swizzle(temp, X, X, X, X)), - 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), + absolute(swizzle(temp, X, X, X, X)), + negate(swizzle(temp, X, X, X, X)), + 0); - emit_arith(rp, PFS_OP_MAD, dest, mask, - swizzle(temp, Y, Y, Y, Y), - swizzle(cnst, W, W, W, W), - swizzle(temp, X, X, X, X), - flags); - } + emit_arith(rp, PFS_OP_MAD, dest, mask, + swizzle(temp, Y, Y, Y, Y), + swizzle(rp->const_sin[0], W, W, W, W), + swizzle(temp, X, X, X, X), + flags); + free_temp(rp, temp); break; case OPCODE_SLT: @@ -1739,7 +1835,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; - rp->const_sin = -1; + rp->const_sin[0] = -1; _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;i<PFS_MAX_ALU_INST;i++) { |