summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRune Peterson <rune@megahurts.dk>2007-02-14 23:10:52 +0100
committerJerome Glisse <glisse@freedesktop.org>2007-02-14 23:10:52 +0100
commite4c772bead57c0190f74f1fa43dd129e170567f7 (patch)
treedfe32a524c09a2af2e3f8df9b67c444f50cb3db4
parent1e2b46963595e02172b4d651cc8a219e9f5c8176 (diff)
r300: Fix cos & add scs to fragment program.
So this do : - Fixes COS. - Does range reductions for SIN & COS. - Adds SCS. - removes the optimized version of SIN & COS. - tweaked weight (should help on precision). - fixed a copy paste typo in emit_arith().
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c270
2 files changed, 184 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index b140235159..48b50bca65 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -731,7 +731,7 @@ struct r300_fragment_program {
int max_temp_idx;
/* the index of the sin constant is stored here */
- GLint const_sin;
+ GLint const_sin[2];
GLuint optimization;
};
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index b00cf9ed33..8e45bd5403 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -33,7 +33,6 @@
/*TODO'S
*
- * - SCS instructions
* - Depth write, WPOS/FOGC inputs
* - FogOption
* - Verify results of opcodes for accuracy, I've only checked them
@@ -1081,7 +1080,7 @@ static void emit_arith(struct r300_fragment_program *rp,
break;
}
if (emit_sop &&
- (s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) {
+ (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
vpos = spos = MAX2(vpos, spos);
break;
}
@@ -1204,6 +1203,25 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr)
}
#endif
+static void make_sin_const(struct r300_fragment_program *rp)
+{
+ if(rp->const_sin[0] == -1){
+ GLfloat cnstv[4];
+
+ cnstv[0] = 1.273239545; // 4/PI
+ cnstv[1] =-0.405284735; // -4/(PI*PI)
+ cnstv[2] = 3.141592654; // PI
+ cnstv[3] = 0.2225; // weight
+ rp->const_sin[0] = emit_const4fv(rp, cnstv);
+
+ cnstv[0] = 0.5;
+ cnstv[1] = -1.5;
+ cnstv[2] = 0.159154943; // 1/(2*PI)
+ cnstv[3] = 6.283185307; // 2*PI
+ rp->const_sin[1] = emit_const4fv(rp, cnstv);
+ }
+}
+
static GLboolean parse_program(struct r300_fragment_program *rp)
{
struct gl_fragment_program *mp = &rp->mesa_program;
@@ -1260,84 +1278,68 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
* cos using a parabola (see SIN):
* cos(x):
* x += PI/2
- * x = (x < PI)?x : x-2*PI
+ * x = (x/(2*PI))+0.5
+ * x = frac(x)
+ * x = (x*2*PI)-PI
* result = sin(x)
*/
temp = get_temp_reg(rp);
- if(rp->const_sin == -1){
- cnstv[0] = 1.273239545;
- cnstv[1] =-0.405284735;
- cnstv[2] = 3.141592654;
- cnstv[3] = 0.225;
- rp->const_sin = emit_const4fv(rp, cnstv);
- }
- cnst = rp->const_sin;
+ make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
- emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
- pfs_half,
- undef,
- undef,
- 0);
+ /* add 0.5*PI and do range reduction */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
- swizzle(cnst, Z, Z, Z, Z), //PI
+ swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
pfs_half,
swizzle(keep(src[0]), X, X, X, X),
0);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
- negate(swizzle(temp, W, W, W, W)), //-2
- swizzle(cnst, Z, Z, Z, Z), //PI
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
+ swizzle(rp->const_sin[1], Z, Z, Z, Z),
+ pfs_half,
0);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(cnst, Z, Z, Z, Z), //PI
- negate(pfs_half),
- swizzle(src[0], X, X, X, X),
+ emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+ swizzle(temp, X, X, X, X),
+ undef,
+ undef,
0);
-
- emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
- swizzle(temp, W, W, W, W),
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
swizzle(temp, X, X, X, X),
- swizzle(temp, Y, Y, Y, Y),
+ swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
0);
/* SIN */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
swizzle(temp, Z, Z, Z, Z),
- cnst,
+ rp->const_sin[0],
pfs_zero,
0);
- if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
- emit_arith(rp, PFS_OP_MAD, dest, mask,
- swizzle(temp, Y, Y, Y, Y),
- absolute(swizzle(temp, Z, Z, Z, Z)),
- swizzle(temp, X, X, X, X),
- flags);
- }else{
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
- swizzle(temp, Y, Y, Y, Y),
- absolute(swizzle(temp, Z, Z, Z, Z)),
- swizzle(temp, X, X, X, X),
- 0);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(temp, X, X, X, X),
- absolute(swizzle(temp, X, X, X, X)),
- negate(swizzle(temp, X, X, X, X)),
- 0);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
- emit_arith(rp, PFS_OP_MAD, dest, mask,
- swizzle(temp, Y, Y, Y, Y),
- swizzle(cnst, W, W, W, W),
- swizzle(temp, X, X, X, X),
- flags);
- }
+ emit_arith(rp, PFS_OP_MAD, dest, mask,
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
+ flags);
+
free_temp(rp, temp);
break;
case OPCODE_DP3:
@@ -1577,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
flags);
break;
case OPCODE_SCS:
- ERROR("SCS not implemented\n");
+ /*
+ * cos using a parabola (see SIN):
+ * cos(x):
+ * x += PI/2
+ * x = (x/(2*PI))+0.5
+ * x = frac(x)
+ * x = (x*2*PI)-PI
+ * result = sin(x)
+ */
+ temp = get_temp_reg(rp);
+ make_sin_const(rp);
+ src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
+
+ /* add 0.5*PI and do range reduction */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y,
+ swizzle(rp->const_sin[0], Z, Z, Z, Z),
+ rp->const_sin[1],
+ swizzle(keep(src[0]), X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W,
+ swizzle(rp->const_sin[0], Z, Z, Z, Z),
+ negate(pfs_half),
+ swizzle(keep(src[0]), X, X, X, X),
+ 0);
+
+ emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+ swizzle(temp, X, X, X, X),
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(temp, W, W, W, W),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(temp, Z, Z, Z, Z),
+ rp->const_sin[0],
+ pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
+
+ if(mask & WRITEMASK_Y)
+ {
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(keep(src[0]), X, X, X, X),
+ rp->const_sin[0],
+ pfs_zero,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(keep(src[0]), X, X, X, X)),
+ swizzle(temp, X, X, X, X),
+ 0);
+ }
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+ swizzle(temp, W, W, W, W),
+ absolute(swizzle(temp, W, W, W, W)),
+ negate(swizzle(temp, W, W, W, W)),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X,
+ swizzle(temp, Z, Z, Z, Z),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, W, W, W, W),
+ flags);
+
+ if(mask & WRITEMASK_Y)
+ {
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
+ swizzle(temp, W, W, W, W),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
+ flags);
+ }
+ free_temp(rp, temp);
break;
case OPCODE_SGE:
src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1603,48 +1691,56 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
*/
temp = get_temp_reg(rp);
- if(rp->const_sin == -1){
- cnstv[0] = 1.273239545;
- cnstv[1] =-0.405284735;
- cnstv[2] = 3.141592654;
- cnstv[3] = 0.225;
- rp->const_sin = emit_const4fv(rp, cnstv);
- }
- cnst = rp->const_sin;
+ make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ /* do range reduction */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(keep(src[0]), X, X, X, X),
- cnst,
+ swizzle(rp->const_sin[1], Z, Z, Z, Z),
+ pfs_half,
+ 0);
+
+ emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+ swizzle(temp, X, X, X, X),
+ undef,
+ undef,
+ 0);
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+ swizzle(temp, X, X, X, X),
+ swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+ negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
+ 0);
+
+ /* SIN */
+
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+ swizzle(temp, Z, Z, Z, Z),
+ rp->const_sin[0],
pfs_zero,
0);
- if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
- emit_arith(rp, PFS_OP_MAD, dest, mask,
- swizzle(temp, Y, Y, Y, Y),
- absolute(swizzle(src[0], X, X, X, X)),
- swizzle(temp, X, X, X, X),
- flags);
- }else{
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
- swizzle(temp, Y, Y, Y, Y),
- absolute(swizzle(src[0], X, X, X, X)),
- swizzle(temp, X, X, X, X),
- 0);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+ swizzle(temp, Y, Y, Y, Y),
+ absolute(swizzle(temp, Z, Z, Z, Z)),
+ swizzle(temp, X, X, X, X),
+ 0);
- emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(temp, X, X, X, X),
- absolute(swizzle(temp, X, X, X, X)),
- negate(swizzle(temp, X, X, X, X)),
- 0);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+ swizzle(temp, X, X, X, X),
+ absolute(swizzle(temp, X, X, X, X)),
+ negate(swizzle(temp, X, X, X, X)),
+ 0);
- emit_arith(rp, PFS_OP_MAD, dest, mask,
- swizzle(temp, Y, Y, Y, Y),
- swizzle(cnst, W, W, W, W),
- swizzle(temp, X, X, X, X),
- flags);
- }
+ emit_arith(rp, PFS_OP_MAD, dest, mask,
+ swizzle(temp, Y, Y, Y, Y),
+ swizzle(rp->const_sin[0], W, W, W, W),
+ swizzle(temp, X, X, X, X),
+ flags);
+
free_temp(rp, temp);
break;
case OPCODE_SLT:
@@ -1739,7 +1835,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp)
rp->max_temp_idx = 0;
rp->node[0].alu_end = -1;
rp->node[0].tex_end = -1;
- rp->const_sin = -1;
+ rp->const_sin[0] = -1;
_mesa_memset(cs, 0, sizeof(*rp->cs));
for (i=0;i<PFS_MAX_ALU_INST;i++) {