summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 23:54:31 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-06 10:00:35 +0200
commit2b2cb566563b9f1f9739327ef9874143af838850 (patch)
treec9c8395293bab8913b0d3298848ec96a47892eb9 /src
parent4746752f167c674722b46ab3840297d48e6d889d (diff)
r300_fragprog: Emulate trigonometric functions in radeon_program_alu
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c5
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_emit.c206
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.c144
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.h5
4 files changed, 153 insertions, 207 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 57987f5d0f..8a1d690ae4 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -408,12 +408,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
struct radeon_program_transformation transformations[] = {
{ &transform_TEX, &compiler },
- { &radeonTransformALU, 0 }
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(
r300->radeon.glCtx,
compiler.program,
- 2, transformations);
+ 3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: After transformations:\n");
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index 30f513b5a3..4786b4554d 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -1423,40 +1423,11 @@ static void emit_arith(struct r300_pfs_compile_state *cs,
return;
}
-static GLfloat SinCosConsts[2][4] = {
- {
- 1.273239545, // 4/PI
- -0.405284735, // -4/(PI*PI)
- 3.141592654, // PI
- 0.2225 // weight
- },
- {
- 0.75,
- 0.0,
- 0.159154943, // 1/(2*PI)
- 6.283185307 // 2*PI
- }
-};
-
-static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)
-{
- struct prog_src_register srcreg;
- GLuint constant_swizzle;
-
- srcreg.File = PROGRAM_CONSTANT;
- srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
- SinCosConsts[i], 4, &constant_swizzle);
- srcreg.Swizzle = constant_swizzle;
-
- return emit_const4fv(cs, srcreg);
-}
-
static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi)
{
COMPILE_STATE;
- GLuint src[3], dest, temp[2];
+ GLuint src[3], dest;
int flags, mask = 0;
- int const_sin[2];
if (fpi->SaturateMode == SATURATE_ZERO_ONE)
flags = PFS_FLAG_SAT;
@@ -1485,60 +1456,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_CMP, dest, mask,
src[2], src[1], src[0], flags);
break;
- case OPCODE_COS:
- /*
- * cos using a parabola (see SIN):
- * cos(x):
- * x = (x/(2*PI))+0.75
- * x = frac(x)
- * x = (x*2*PI)-PI
- * result = sin(x)
- */
- temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_sincosconsts(cs, 0);
- const_sin[1] = emit_sincosconsts(cs, 1);
- src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
- /* add 0.5*PI and do range reduction */
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(src[0], X, X, X, X),
- swizzle(const_sin[1], Z, Z, Z, Z),
- swizzle(const_sin[1], X, X, X, X), 0);
-
- emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
- swizzle(temp[0], X, X, X, X),
- undef, undef, 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
- negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI
- 0);
-
- /* SIN */
-
- emit_arith(cs, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
- swizzle(temp[0], X, X, X, X),
- absolute(swizzle(temp[0], X, X, X, X)),
- negate(swizzle(temp[0], X, X, X, X)), 0);
-
- emit_arith(cs, PFS_OP_MAD, dest, mask,
- swizzle(temp[0], Y, Y, Y, Y),
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[0], X, X, X, X), flags);
-
- free_temp(cs, temp[0]);
- break;
case OPCODE_DP3:
src[0] = t_src(cs, fpi->SrcReg[0]);
src[1] = t_src(cs, fpi->SrcReg[1]);
@@ -1609,127 +1526,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_RSQ, dest, mask,
absolute(src[0]), pfs_zero, pfs_zero, flags);
break;
- case OPCODE_SCS:
- /*
- * scs using a parabola :
- * scs(x):
- * result.x = sin(-abs(x)+0.5*PI) (cos)
- * result.y = sin(x) (sin)
- *
- */
- temp[0] = get_temp_reg(cs);
- temp[1] = get_temp_reg(cs);
- const_sin[0] = emit_sincosconsts(cs, 0);
- const_sin[1] = emit_sincosconsts(cs, 1);
- src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
- /* x = -abs(x)+0.5*PI */
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI
- pfs_half,
- negate(abs
- (swizzle(keep(src[0]), X, X, X, X))),
- 0);
-
- /* C*x (sin) */
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W,
- swizzle(const_sin[0], Y, Y, Y, Y),
- swizzle(keep(src[0]), X, X, X, X),
- pfs_zero, 0);
-
- /* B*x, C*x (cos) */
- emit_arith(cs, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- /* B*x (sin) */
- emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
- swizzle(const_sin[0], X, X, X, X),
- keep(src[0]), pfs_zero, 0);
-
- /* y = B*x + C*x*abs(x) (sin) */
- emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z,
- absolute(src[0]),
- swizzle(temp[0], W, W, W, W),
- swizzle(temp[1], W, W, W, W), 0);
-
- /* y = B*x + C*x*abs(x) (cos) */
- emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
- emit_arith(cs, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
- W, Z, Y,
- X),
- absolute(swizzle(temp[1], W, Z, Y, X)),
- negate(swizzle(temp[1], W, Z, Y, X)), 0);
-
- /* dest.xy = mad(temp.xy, P, temp2.wz) */
- emit_arith(cs, PFS_OP_MAD, dest,
- mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[1], W, Z, Y, X), flags);
-
- free_temp(cs, temp[0]);
- free_temp(cs, temp[1]);
- break;
- case OPCODE_SIN:
- /*
- * using a parabola:
- * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
- * extra precision is obtained by weighting against
- * itself squared.
- */
-
- temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_sincosconsts(cs, 0);
- const_sin[1] = emit_sincosconsts(cs, 1);
- src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
- /* do range reduction */
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(keep(src[0]), X, X, X, X),
- swizzle(const_sin[1], Z, Z, Z, Z),
- pfs_half, 0);
-
- emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
- swizzle(temp[0], X, X, X, X),
- undef, undef, 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
- negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI
- 0);
-
- /* SIN */
-
- emit_arith(cs, PFS_OP_MAD, temp[0],
- WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
- Z, Z, Z,
- Z),
- const_sin[0], pfs_zero, 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
- swizzle(temp[0], Y, Y, Y, Y),
- absolute(swizzle(temp[0], Z, Z, Z, Z)),
- swizzle(temp[0], X, X, X, X), 0);
-
- emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
- swizzle(temp[0], X, X, X, X),
- absolute(swizzle(temp[0], X, X, X, X)),
- negate(swizzle(temp[0], X, X, X, X)), 0);
-
- emit_arith(cs, PFS_OP_MAD, dest, mask,
- swizzle(temp[0], Y, Y, Y, Y),
- swizzle(const_sin[0], W, W, W, W),
- swizzle(temp[0], X, X, X, X), flags);
-
- free_temp(cs, temp[0]);
- break;
case OPCODE_TEX:
emit_tex(cs, fpi, R300_TEX_OP_LD);
break;
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
index 4a40d3e44d..fa6a67f0c1 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c
@@ -149,6 +149,14 @@ static struct prog_src_register srcregswz(int file, int index, int swz)
return src;
}
+static struct prog_src_register absolute(struct prog_src_register reg)
+{
+ struct prog_src_register newreg = reg;
+ newreg.Abs = 1;
+ newreg.NegateAbs = 0;
+ return newreg;
+}
+
static struct prog_src_register negate(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
@@ -412,3 +420,139 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
return GL_FALSE;
}
}
+
+
+static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+{
+ static const GLfloat SinCosConsts[2][4] = {
+ {
+ 1.273239545, // 4/PI
+ -0.405284735, // -4/(PI*PI)
+ 3.141592654, // PI
+ 0.2225 // weight
+ },
+ {
+ 0.75,
+ 0.5,
+ 0.159154943, // 1/(2*PI)
+ 6.283185307 // 2*PI
+ }
+ };
+ int i;
+
+ for(i = 0; i < 2; ++i) {
+ GLuint swz;
+ constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
+ ASSERT(swz == SWIZZLE_NOOP);
+ }
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(struct radeon_transform_context* t,
+ struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
+{
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_MUL, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcreg(PROGRAM_CONSTANT, constants[0]));
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_X),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_Y),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
+ emit3(t->Program, OPCODE_MAD, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ * MOV, ADD, MUL, MAD, FRC
+ */
+GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ GLuint constants[2];
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ sincos_constants(t, constants);
+
+ if (inst->Opcode == OPCODE_COS) {
+ // MAD tmp.x, src, 1/(2*PI), 0.75
+ // FRC tmp.x, tmp.x
+ // MAD tmp.z, tmp.x, 2*PI, -PI
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else {
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
+ emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg));
+ emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ struct prog_dst_register dst = inst->DstReg;
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ constants);
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ constants);
+ }
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h
index 858c5ed0b8..3fe6153fd8 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h
@@ -35,4 +35,9 @@ GLboolean radeonTransformALU(
struct prog_instruction*,
void*);
+GLboolean radeonTransformTrigSimple(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
#endif /* __RADEON_PROGRAM_ALU_H_ */