CELL: fix colormask code generation

The colormask code generation had assumed that its input packed pixels were in RGBA format. In fact, the format they're in is dependent on the pipe color format. Now the color format is passed in to gen_colormask(), and proper color format-dependent SPU code is generated.
author: Robert Ellison <papillo@tungstengraphics.com> 2008-09-23 10:11:59 -0600
committer: Robert Ellison <papillo@tungstengraphics.com> 2008-09-23 10:11:59 -0600
commit: 1c79cf15c48e51cb5cf790f44214ae6aaf78c69b (patch)
tree: e4ff45d981ac1866b874456edcc2a8804f9bdc49
parent: 6901d6ef24224c27e20c3e864d035db1552aeeb8 (diff)
1 files changed, 78 insertions, 83 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 1837b4c79b..3b166e446d 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -971,87 +971,6 @@ gen_logicop(const struct pipe_blend_state *blend,
 }
 
 
-static void
-gen_colormask(uint colormask,
-              struct spe_function *f,
-              int fragRGBA_reg, int fbRGBA_reg)
-{
-   /* We've got four 32-bit RGBA packed pixels in each of
-    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
-    * reds, greens, blues, and alphas.
-    * */
-
-   /* The color mask operation can prevent any set of color
-    * components in the incoming fragment from being written to the frame 
-    * buffer; we do this by replacing the masked components of the 
-    * fragment with the frame buffer values.
-    *
-    * There are only 16 possibilities, with a unique mask for
-    * each of the possibilities.  (Technically, there are only 15
-    * possibilities, since we shouldn't be called for the one mask
-    * that does nothing, but the complete implementation is here
-    * anyway to avoid confusion.)
-    *
-    * We implement this via a constant static array which we'll index 
-    * into to get the correct mask.
-    * 
-    * We're dependent on the mask values being low-order bits,
-    * with particular values for each bit; so we start with a
-    * few assertions, which will fail if any of the values were
-    * to change.
-    */
-   ASSERT(PIPE_MASK_R == 0x1);
-   ASSERT(PIPE_MASK_G == 0x2);
-   ASSERT(PIPE_MASK_B == 0x4);
-   ASSERT(PIPE_MASK_A == 0x8);
-
-   /* Here's the list of all possible colormasks, indexed by the
-    * value of the combined mask specifier.
-    */
-   static const unsigned int colormasks[16] = {
-      0x00000000, /* 0: all colors masked */
-      0xff000000, /* 1: PIPE_MASK_R */
-      0x00ff0000, /* 2: PIPE_MASK_G */
-      0xffff0000, /* 3: PIPE_MASK_R | PIPE_MASK_G */
-      0x0000ff00, /* 4: PIPE_MASK_B */
-      0xff00ff00, /* 5: PIPE_MASK_R | PIPE_MASK_B */
-      0x00ffff00, /* 6: PIPE_MASK_G | PIPE_MASK_B */
-      0xffffff00, /* 7: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B */
-      0x000000ff, /* 8: PIPE_MASK_A */
-      0xff0000ff, /* 9: PIPE_MASK_R | PIPE_MASK_A */
-      0x00ff00ff, /* 10: PIPE_MASK_G | PIPE_MASK_A */
-      0xffff00ff, /* 11: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_A */
-      0x0000ffff, /* 12: PIPE_MASK_B | PIPE_MASK_A */
-      0xff00ffff, /* 13: PIPE_MASK_R | PIPE_MASK_B | PIPE_MASK_A */
-      0x00ffffff, /* 14: PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */
-      0xffffffff  /* 15: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */
-   };
-
-   /* Get a temporary register to hold the mask */
-   int colormask_reg = spe_allocate_available_register(f);
-
-   /* Look up the desired mask directly and load it into the mask register.
-    * This will load the same mask into each of the four words in the
-    * mask register.
-    */
-   spe_load_uint(f, colormask_reg, colormasks[colormask]);
-
-   /* Use the mask register to select between the fragment color
-    * values and the frame buffer color values.  Wherever the
-    * mask has a 0 bit, the current frame buffer color should override
-    * the fragment color.  Wherever the mask has a 1 bit, the 
-    * fragment color should persevere.  The Select Bits (selb rt, rA, rB, rM)
-    * instruction will select bits from its first operand rA wherever the
-    * the mask bits rM are 0, and from its second operand rB wherever the
-    * mask bits rM are 1.  That means that the frame buffer color is the
-    * first operand, and the fragment color the second.
-    */
-    spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
-
-    /* Release the temporary register and we're done */
-    spe_release_register(f, colormask_reg);
-}
-
 /**
  * Generate code to pack a quad of float colors into four 32-bit integers.
  *
@@ -1118,8 +1037,85 @@ gen_pack_colors(struct spe_function *f,
    spe_release_register(f, ba_reg);
 }
 
+static void
+gen_colormask(struct spe_function *f,
+              uint colormask,
+              enum pipe_format color_format,
+              int fragRGBA_reg, int fbRGBA_reg)
+{
+   /* We've got four 32-bit RGBA packed pixels in each of
+    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+    * reds, greens, blues, and alphas.  Further, the pixels
+    * are packed according to the given color format, not
+    * necessarily RGBA...
+    */
+   unsigned int r_mask;
+   unsigned int g_mask;
+   unsigned int b_mask;
+   unsigned int a_mask;
+
+   /* Calculate exactly where the bits for any particular color
+    * end up, so we can mask them correctly.
+    */
+   switch(color_format) {
+      case PIPE_FORMAT_A8R8G8B8_UNORM:
+         /* ARGB */
+         a_mask = 0xff000000;
+         r_mask = 0x00ff0000;
+         g_mask = 0x0000ff00;
+         b_mask = 0x000000ff;
+         break;
+      case PIPE_FORMAT_B8G8R8A8_UNORM:
+         /* BGRA */
+         b_mask = 0xff000000;
+         g_mask = 0x00ff0000;
+         r_mask = 0x0000ff00;
+         a_mask = 0x000000ff;
+         break;
+      default:
+         ASSERT(0);
+   }
 
+   /* For each R, G, B, and A component we're supposed to mask out, 
+    * clear its bits.   Then our mask operation later will work 
+    * as expected.
+    */
+   if (!(colormask & PIPE_MASK_R)) {
+      r_mask = 0;
+   }
+   if (!(colormask & PIPE_MASK_G)) {
+      g_mask = 0;
+   }
+   if (!(colormask & PIPE_MASK_B)) {
+      b_mask = 0;
+   }
+   if (!(colormask & PIPE_MASK_A)) {
+      a_mask = 0;
+   }
+
+   /* Get a temporary register to hold the mask that will be applied to the fragment */
+   int colormask_reg = spe_allocate_available_register(f);
 
+   /* The actual mask we're going to use is an OR of the remaining R, G, B, and A
+    * masks.  Load the result value into our temporary register.
+    */
+   spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
+
+   /* Use the mask register to select between the fragment color
+    * values and the frame buffer color values.  Wherever the
+    * mask has a 0 bit, the current frame buffer color should override
+    * the fragment color.  Wherever the mask has a 1 bit, the 
+    * fragment color should persevere.  The Select Bits (selb rt, rA, rB, rM)
+    * instruction will select bits from its first operand rA wherever the
+    * the mask bits rM are 0, and from its second operand rB wherever the
+    * mask bits rM are 1.  That means that the frame buffer color is the
+    * first operand, and the fragment color the second.
+    */
+    spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
+
+    /* Release the temporary register and we're done */
+    spe_release_register(f, colormask_reg);
+}
 
 /**
  * Generate SPE code to implement the fragment operations (alpha test,
@@ -1383,7 +1379,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
       }
 
       if (blend->colormask != PIPE_MASK_RGBA) {
-         gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
+         gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg);
       }
 
 
@@ -1407,7 +1403,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
 
    spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */
 
-
    spe_release_register(f, fbRGBA_reg);
    spe_release_register(f, fbZS_reg);
    spe_release_register(f, quad_offset_reg);
author	Robert Ellison <papillo@tungstengraphics.com>	2008-09-23 10:11:59 -0600
committer	Robert Ellison <papillo@tungstengraphics.com>	2008-09-23 10:11:59 -0600
commit	1c79cf15c48e51cb5cf790f44214ae6aaf78c69b (patch)
tree	e4ff45d981ac1866b874456edcc2a8804f9bdc49
parent	6901d6ef24224c27e20c3e864d035db1552aeeb8 (diff)