From f8bba34d4e12ef4c620cac881a4b697a1e668377 Mon Sep 17 00:00:00 2001
From: Robert Ellison <papillo@tungstengraphics.com>
Date: Thu, 18 Sep 2008 01:29:41 -0600
Subject: CELL: finish fragment ops blending (except for unusual D3D modes)

- Added new "macro" functions spe_float_min() and spe_float_max()
  to rtasm_ppc_spe.{ch}.  These emit instructions that cause
  the minimum or maximum of each element in a vector of floats
  to be saved in the destination register.

- Major changes to cell_gen_fragment.c to implement all the blending
  modes (except for the mysterious D3D-based PIPE_BLENDFACTOR_SRC1_COLOR,
  PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_COLOR, and
  PIPE_BLENDFACTOR_INV_SRC1_ALPHA).

- Some revamping of code in cell_gen_fragment.c: use the new spe_float_min()
  and spe_float_max() functions (instead of expanding these calculations
  inline via macros); create and use an inline utility function for handling
  "optional" register allocation (for the {1,1,1,1} vector, and the
  blend color vectors) instead of expanding with macros; use the Float
  Multiply and Subtract (fnms) instruction to simplify and optimize many
  blending calculations.
---
 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 41 ++++++++++++++++++++++++++++-
 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h |  8 ++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 870ae802c5..12e0826fb9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -384,7 +384,7 @@ void spe_release_func(struct spe_function *p)
 
 
 /**
- * Alloate a SPE register.
+ * Allocate a SPE register.
  * \return register index or -1 if none left.
  */
 int spe_allocate_available_register(struct spe_function *p)
@@ -646,5 +646,44 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word)
    }
 }
 
+/* For each 32-bit float element of rA and rB, choose the smaller of the
+ * two, compositing them into the rT register.
+ * 
+ * The Float Compare Greater Than (fcgt) instruction will put 1s into
+ * compare_reg where rA > rB, and 0s where rA <= rB.
+ *
+ * Then the Select Bits (selb) instruction will take bits from rA where
+ * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
+ * where rA <= rB and from rB where rB > rA, which is exactly the
+ * "min" operation.
+ *
+ * The compare_reg could in many cases be the same as rT, unless
+ * rT == rA || rt == rB.  But since this is common in constructions
+ * like "x = min(x, a)", we always allocate a new register to be safe.
+ */
+void 
+spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB)
+{
+   unsigned int compare_reg = spe_allocate_available_register(p);
+   spe_fcgt(p, compare_reg, rA, rB);
+   spe_selb(p, rT, rA, rB, compare_reg);
+   spe_release_register(p, compare_reg);
+}
+
+/* For each 32-bit float element of rA and rB, choose the greater of the
+ * two, compositing them into the rT register.
+ * 
+ * The logic is similar to that of spe_float_min() above; the only
+ * difference is that the registers on spe_selb() have been reversed,
+ * so that the larger of the two is selected instead of the smaller.
+ */
+void 
+spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB)
+{
+   unsigned int compare_reg = spe_allocate_available_register(p);
+   spe_fcgt(p, compare_reg, rA, rB);
+   spe_selb(p, rT, rB, rA, compare_reg);
+   spe_release_register(p, compare_reg);
+}
 
 #endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index 2579045232..4ef05ea27d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -322,6 +322,14 @@ spe_zero(struct spe_function *p, unsigned rT);
 extern void
 spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word);
 
+/** rT = float min(rA, rB) */
+extern void
+spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
+
+/** rT = float max(rA, rB) */
+extern void
+spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
+
 
 /* Floating-point instructions
  */
-- 
cgit v1.2.3