CELL: finish fragment ops blending (except for unusual D3D modes)

- Added new "macro" functions spe_float_min() and spe_float_max() to rtasm_ppc_spe.{ch}. These emit instructions that cause the minimum or maximum of each element in a vector of floats to be saved in the destination register. - Major changes to cell_gen_fragment.c to implement all the blending modes (except for the mysterious D3D-based PIPE_BLENDFACTOR_SRC1_COLOR, PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_COLOR, and PIPE_BLENDFACTOR_INV_SRC1_ALPHA). - Some revamping of code in cell_gen_fragment.c: use the new spe_float_min() and spe_float_max() functions (instead of expanding these calculations inline via macros); create and use an inline utility function for handling "optional" register allocation (for the {1,1,1,1} vector, and the blend color vectors) instead of expanding with macros; use the Float Multiply and Subtract (fnms) instruction to simplify and optimize many blending calculations.
author: Robert Ellison <papillo@tungstengraphics.com> 2008-09-18 01:29:41 -0600
committer: Robert Ellison <papillo@tungstengraphics.com> 2008-09-18 01:29:41 -0600
commit: f8bba34d4e12ef4c620cac881a4b697a1e668377 (patch)
tree: 5bc0c7927202e26b5566bdbd479f51ff573e4c37 /src/gallium/auxiliary/rtasm
parent: f631093ce76ad14dee63293761d7da7b7b42fc6d (diff)
2 files changed, 48 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 870ae802c5..12e0826fb9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -384,7 +384,7 @@ void spe_release_func(struct spe_function *p)
 
 
 /**
- * Alloate a SPE register.
+ * Allocate a SPE register.
  * \return register index or -1 if none left.
  */
 int spe_allocate_available_register(struct spe_function *p)
@@ -646,5 +646,44 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word)
    }
 }
 
+/* For each 32-bit float element of rA and rB, choose the smaller of the
+ * two, compositing them into the rT register.
+ * 
+ * The Float Compare Greater Than (fcgt) instruction will put 1s into
+ * compare_reg where rA > rB, and 0s where rA <= rB.
+ *
+ * Then the Select Bits (selb) instruction will take bits from rA where
+ * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
+ * where rA <= rB and from rB where rB > rA, which is exactly the
+ * "min" operation.
+ *
+ * The compare_reg could in many cases be the same as rT, unless
+ * rT == rA || rt == rB.  But since this is common in constructions
+ * like "x = min(x, a)", we always allocate a new register to be safe.
+ */
+void 
+spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB)
+{
+   unsigned int compare_reg = spe_allocate_available_register(p);
+   spe_fcgt(p, compare_reg, rA, rB);
+   spe_selb(p, rT, rA, rB, compare_reg);
+   spe_release_register(p, compare_reg);
+}
+
+/* For each 32-bit float element of rA and rB, choose the greater of the
+ * two, compositing them into the rT register.
+ * 
+ * The logic is similar to that of spe_float_min() above; the only
+ * difference is that the registers on spe_selb() have been reversed,
+ * so that the larger of the two is selected instead of the smaller.
+ */
+void 
+spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB)
+{
+   unsigned int compare_reg = spe_allocate_available_register(p);
+   spe_fcgt(p, compare_reg, rA, rB);
+   spe_selb(p, rT, rB, rA, compare_reg);
+   spe_release_register(p, compare_reg);
+}
 
 #endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index 2579045232..4ef05ea27d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -322,6 +322,14 @@ spe_zero(struct spe_function *p, unsigned rT);
 extern void
 spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word);
 
+/** rT = float min(rA, rB) */
+extern void
+spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
+
+/** rT = float max(rA, rB) */
+extern void
+spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
+
 
 /* Floating-point instructions
  */
author	Robert Ellison <papillo@tungstengraphics.com>	2008-09-18 01:29:41 -0600
committer	Robert Ellison <papillo@tungstengraphics.com>	2008-09-18 01:29:41 -0600
commit	f8bba34d4e12ef4c620cac881a4b697a1e668377 (patch)
tree	5bc0c7927202e26b5566bdbd479f51ff573e4c37 /src/gallium/auxiliary/rtasm
parent	f631093ce76ad14dee63293761d7da7b7b42fc6d (diff)