summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/radeon_program_alu.c
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 22:21:24 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-06 09:59:43 +0200
commit62bccd6df0c963a14e801bcac95dc8046b978a7f (patch)
treebef7225f252c272272cc445c24c1935967554d37 /src/mesa/drivers/dri/r300/radeon_program_alu.c
parent77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 (diff)
r300: Allow adding parameters during fragprog transform, share LIT code
Diffstat (limited to 'src/mesa/drivers/dri/r300/radeon_program_alu.c')
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.c124
1 files changed, 117 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
index d6d016d7c1..85ea810523 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c
@@ -35,6 +35,8 @@
#include "radeon_program_alu.h"
+#include "shader/prog_parameter.h"
+
static struct prog_instruction *emit1(struct gl_program* p,
gl_inst_opcode Opcode, struct prog_dst_register DstReg,
@@ -101,6 +103,19 @@ static struct prog_dst_register dstreg(int file, int index)
return dst;
}
+static struct prog_dst_register dstregtmpmask(int index, int mask)
+{
+ struct prog_dst_register dst;
+ dst.File = PROGRAM_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
static const struct prog_src_register builtin_zero = {
.File = PROGRAM_BUILTIN,
.Index = 0,
@@ -125,6 +140,15 @@ static struct prog_src_register srcreg(int file, int index)
return src;
}
+static struct prog_src_register srcregswz(int file, int index, int swz)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
static struct prog_src_register negate(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
@@ -136,10 +160,10 @@ static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x,
{
struct prog_src_register swizzled = reg;
swizzled.Swizzle = MAKE_SWIZZLE4(
- GET_SWZ(reg.Swizzle, x),
- GET_SWZ(reg.Swizzle, y),
- GET_SWZ(reg.Swizzle, z),
- GET_SWZ(reg.Swizzle, w));
+ x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
+ y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
+ z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
+ w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
return swizzled;
}
@@ -185,6 +209,93 @@ static void transform_FLR(struct radeon_transform_context* t,
emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
}
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ static const GLfloat LitConst[4] = { -127.999999 };
+
+ GLuint constant;
+ GLuint constant_swizzle;
+ GLuint temp;
+ int needTemporary = 0;
+ struct prog_src_register srctemp;
+
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+
+ if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
+ needTemporary = 1;
+ } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
+ // LIT is typically followed by DP3/DP4, so there's no point
+ // in creating special code for this case
+ needTemporary = 1;
+ }
+
+ if (needTemporary) {
+ temp = radeonFindFreeTemporary(t);
+ } else {
+ temp = inst->DstReg.Index;
+ }
+ srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+
+ // tmp.x = max(0.0, Src.x);
+ // tmp.y = max(0.0, Src.y);
+ // tmp.w = clamp(Src.z, -128+eps, 128-eps);
+ emit2(t->Program, OPCODE_MAX,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ inst->SrcReg[0],
+ swizzle(srcreg(PROGRAM_CONSTANT, constant),
+ SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(t->Program, OPCODE_MIN,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+
+ // tmp.w = Pow(tmp.y, tmp.w)
+ emit1(t->Program, OPCODE_LG2,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit2(t->Program, OPCODE_MUL,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
+ emit1(t->Program, OPCODE_EX2,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+
+ // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
+ emit3(t->Program, OPCODE_CMP,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ builtin_zero);
+
+ // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
+ emit1(t->Program, OPCODE_MOV,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+
+ if (needTemporary)
+ emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp);
+}
+
static void transform_POW(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
@@ -249,13 +360,11 @@ static void transform_XPD(struct radeon_transform_context* t,
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD
+ * ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
* @note should be applicable to R300 and R500 fragment programs.
- *
- * @todo add LIT here as well?
*/
GLboolean radeonTransformALU(struct radeon_transform_context* t,
struct prog_instruction* inst,
@@ -265,6 +374,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;