From 232a489b41097b462fc0ad2b88f0df75a1abd4c3 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 1 Nov 2006 12:03:36 +0000
Subject: Add LIT instruction to fragment program.

---
 src/mesa/drivers/dri/r300/r300_fragprog.c | 102 +++++++++++++++++++++++++-----
 src/mesa/drivers/dri/r300/r300_fragprog.h |   3 +-
 2 files changed, 88 insertions(+), 17 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 2d947dea3a..91ec4f855c 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -82,7 +82,8 @@ static const struct {
 	{ "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2 },
 	{ "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP },
 	{ "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ },
-	{ "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL }
+	{ "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL },
+	{ "CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL },
 };
 
 #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
@@ -180,7 +181,7 @@ static const pfs_reg_t undef = {
 	valid: GL_FALSE
 };
 
-/* constant zero source */
+/* constant one source */
 static const pfs_reg_t pfs_one = {
 	type: REG_TYPE_CONST,
 	index: 0,
@@ -189,7 +190,16 @@ static const pfs_reg_t pfs_one = {
 	valid: GL_TRUE
 };
 
-/* constant one source */
+/* constant half source */
+static const pfs_reg_t pfs_half = {
+	type: REG_TYPE_CONST,
+	index: 0,
+	v_swz: SWIZZLE_HHH,
+	s_swz: SWIZZLE_HALF,
+	valid: GL_TRUE
+};
+
+/* constant zero source */
 static const pfs_reg_t pfs_zero = {
 	type: REG_TYPE_CONST,
 	index: 0,
@@ -319,7 +329,6 @@ static pfs_reg_t emit_param4fv(struct r300_fragment_program *rp,
 	return r;
 }
 
-#if 0
 static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
 { 
 	pfs_reg_t r = undef;
@@ -330,13 +339,11 @@ static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
 		ERROR("Out of hw constants!\n");
 		return r;
 	}
-	
-	COPY_4V(rp->constant[r.index], cp);
 
+	COPY_4V(rp->constant[r.index], cp);
 	r.valid = GL_TRUE;
 	return r;
 }
-#endif
 
 static __inline pfs_reg_t negate(pfs_reg_t r)
 {
@@ -773,13 +780,15 @@ static void emit_tex(struct r300_fragment_program *rp,
 		cs->dest_in_node = 0;
 	}
 	
-	if (rp->cur_node == 0) rp->first_node_has_tex = 1;
+	if (rp->cur_node == 0)
+		rp->first_node_has_tex = 1;
 
-    rp->tex.inst[rp->tex.length++] = 0
-        | (hwsrc << R300_FPITX_SRC_SHIFT)
-        | (hwdest << R300_FPITX_DST_SHIFT)
-        | (unit << R300_FPITX_IMAGE_SHIFT)
-        | (opcode << R300_FPITX_OPCODE_SHIFT); /* not entirely sure about this */
+	rp->tex.inst[rp->tex.length++] = 0
+		| (hwsrc << R300_FPITX_SRC_SHIFT)
+		| (hwdest << R300_FPITX_DST_SHIFT)
+		| (unit << R300_FPITX_IMAGE_SHIFT)
+		/* not entirely sure about this */
+		| (opcode << R300_FPITX_OPCODE_SHIFT);
 
 	cs->dest_in_node |= (1 << hwdest); 
 	if (coord.type != REG_TYPE_CONST)
@@ -884,7 +893,7 @@ static void emit_arith(struct r300_fragment_program *rp, int op,
 
 	vop = r300_fpop[op].v_op;
 	sop = r300_fpop[op].s_op;
-	argc = r300_fpop[op].argc;	
+	argc = r300_fpop[op].argc;
 
 	if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
 		emit_vop = GL_TRUE;
@@ -1039,7 +1048,9 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
 	const struct prog_instruction *inst = mp->Base.Instructions;
 	struct prog_instruction *fpi;
 	pfs_reg_t src[3], dest, temp;
+	pfs_reg_t cnst;
 	int flags, mask = 0;
+	GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0};
 
 	if (!inst || inst[0].Opcode == OPCODE_END) {
 		ERROR("empty program?\n");
@@ -1179,7 +1190,66 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
 				   flags);
 			break;
 		case OPCODE_LIT:
-			ERROR("LIT not implemented\n");
+			/* LIT
+			 * if (s.x < 0) t.x = 0; else t.x = s.x;
+			 * if (s.y < 0) t.y = 0; else t.y = s.y;
+			 * if (s.w >  128.0) t.w =  128.0; else t.w = s.w;
+			 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
+			 * r.x = 1.0
+			 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
+			 * Also r.y = 0 if t.y < 0
+			 * For the t.x > 0 FGLRX use the CMPH opcode which
+			 * change the compare to (t.x + 0.5) > 0.5 we may
+			 * save one instruction by doing CMP -t.x 
+			 */
+			cnstv[0] = cnstv[1] = cnstv[2] = cnstv[4] = 0.50001;
+			src[0] = t_src(rp, fpi->SrcReg[0]);
+			temp = get_temp_reg(rp);
+			cnst = emit_const4fv(rp, cnstv);
+			emit_arith(rp, PFS_OP_CMP, temp,
+				   WRITEMASK_X | WRITEMASK_Y,
+				   src[0], pfs_zero, src[0], flags);
+			emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z,
+				   swizzle(keep(src[0]), W, W, W, W),
+				   cnst, undef, flags);
+			emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
+				   swizzle(temp, Y, Y, Y, Y),
+				   undef, undef, flags);
+			emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_Z,
+				   temp, negate(cnst), undef, flags);
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+				   temp, swizzle(temp, Z, Z, Z, Z),
+				   pfs_zero, flags);
+			emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W,
+				   temp, undef, undef, flags);
+			emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
+				   swizzle(keep(temp), X, X, X, X),
+				   pfs_one, pfs_zero, flags);
+#if 0
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+				   temp, pfs_one, pfs_half, flags);
+			emit_arith(rp, PFS_OP_CMPH, temp, WRITEMASK_Z,
+				   swizzle(keep(temp), W, W, W, W),
+				   pfs_zero, swizzle(keep(temp), X, X, X, X),
+				   flags);
+#else
+			emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+				   pfs_zero,
+				   swizzle(keep(temp), W, W, W, W),
+				   negate(swizzle(keep(temp), X, X, X, X)),
+				   flags);
+#endif
+			emit_arith(rp, PFS_OP_CMP, dest, WRITEMASK_Z,
+				   pfs_zero, temp,
+				   negate(swizzle(keep(temp), Y, Y, Y, Y)),
+				   flags);
+			emit_arith(rp, PFS_OP_MAD, dest,
+				   WRITEMASK_X | WRITEMASK_W,
+				   pfs_one,
+				   pfs_one,
+				   pfs_zero,
+				   flags);
+			free_temp(rp, temp);
 			break;
 		case OPCODE_LRP:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1345,7 +1415,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
 			return GL_FALSE;
 
 	}
-	
+
 	return GL_TRUE;
 }
 
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
index e7dbaf973e..4bbaa07e01 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.h
@@ -78,7 +78,8 @@ typedef struct r300_fragment_program_swizzle {
 #define PFS_OP_RCP 9
 #define PFS_OP_RSQ 10
 #define PFS_OP_REPL_ALPHA 11
-#define MAX_PFS_OP 11
+#define PFS_OP_CMPH 12
+#define MAX_PFS_OP 12
 
 #define PFS_FLAG_SAT	(1 << 0)
 #define PFS_FLAG_ABS	(1 << 1)
-- 
cgit v1.2.3