diff options
| author | Robert Ellison <papillo@tungstengraphics.com> | 2008-09-17 02:30:20 -0600 | 
|---|---|---|
| committer | Robert Ellison <papillo@tungstengraphics.com> | 2008-09-17 02:32:43 -0600 | 
| commit | 858ced051551aa5d0ddd41936253d3a4ee5c142f (patch) | |
| tree | 1ea8b4b05a09cc9d63be5491cef6c19b8c49efdd | |
| parent | a3a797ffa84975330d5632ce7a71c65c9c2ad0d8 (diff) | |
CELL: fleshing out the blending fragment ops
- Added two new debug flags (to be used with the CELL_DEBUG environment
  variable).  The first, "CELL_DEBUG=fragops", activates SPE fragment
  ops debug messages.  The second, "CELL_DEBUG=fragopfallback", will
  eventually be used to disable the use of generated SPE code for
  fragment ops in favor of the default fallback reference routine.
  (During development, though, the parity of this flag is reversed:
  all users will get the reference code *unless* CELL_DEBUG=fragopfallback
  is set.  This will prevent hiccups in code generation from affecting
  the other developers.)
- Formalized debug message usage and macros in spu/spu_main.c.
- Added lots of new code to ppu/cell_gen_fragment.c to extend the
  number of supported source RGB factors from 4 to 15, and to
  complete the list of supported blend equations.
More coming, to complete the source and destination RGB and alpha
factors, and to complete the rest of the fragment operations...
| -rw-r--r-- | src/gallium/drivers/cell/common.h | 11 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_context.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 272 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_emit.c | 5 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 115 | 
5 files changed, 337 insertions, 68 deletions
| diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8f08854117..f0ff96eb47 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -104,12 +104,11 @@  #define CELL_BUFFER_STATUS_FREE 10  #define CELL_BUFFER_STATUS_USED 20 - -#define CELL_DEBUG_CHECKER  (1 << 0) -#define CELL_DEBUG_ASM      (1 << 1) -#define CELL_DEBUG_SYNC     (1 << 2) - - +#define CELL_DEBUG_CHECKER              (1 << 0) +#define CELL_DEBUG_ASM                  (1 << 1) +#define CELL_DEBUG_SYNC                 (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS         (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)  /** Max instructions for doing per-fragment operations */  #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b418271dca..62e213ea35 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -89,6 +89,8 @@ static const struct debug_named_value cell_debug_flags[] = {     {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */     {"asm", CELL_DEBUG_ASM},        /**< dump SPU asm code */     {"sync", CELL_DEBUG_SYNC},      /**< SPUs do synchronous DMA */ +   {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ +   {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/     {NULL, 0}  }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06219d4e98..2c8c9e0d2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,7 +229,36 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,     spe_release_register(f, amask_reg);  } +/* This is a convenient and oft-used sequence.  It chooses + * the smaller of each element of reg1 and reg2, and combines them + * into the result register, as follows: + *  + * The Float Compare Greater Than (fcgt) instruction will put + * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. + * + * Then the Select Bits (selb) instruction will take bits from + * reg1 where compare_reg is 0, and from reg2 where compare_reg is + * 1.  Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, + * and the bits from reg2 where reg1 > reg2, which is exactly the + * MIN operation. + */ +#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ +   int compare_reg = spe_allocate_available_register(f); \ +   spe_fcgt(f, compare_reg, reg1, reg2); \ +   spe_selb(f, result_reg, reg1, reg2, compare_reg); \ +   spe_release_register(f, compare_reg); \ +} +/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN  + * sequence above, except that the registers specified when selecting + * bits are reversed. + */ +#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ +   int compare_reg = spe_allocate_available_register(f); \ +   spe_fcgt(f, compare_reg, reg1, reg2); \ +   spe_selb(f, result_reg, reg2, reg1, compare_reg); \ +   spe_release_register(f, compare_reg); \ +}  /**   * Generate SPE code to implement the given blend mode for a quad of pixels. @@ -242,6 +271,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,   */  static void  gen_blend(const struct pipe_blend_state *blend, +          const struct pipe_blend_color *blend_color,            struct spe_function *f,            enum pipe_format color_format,            int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, @@ -262,10 +292,53 @@ gen_blend(const struct pipe_blend_state *blend,     int fbB_reg = spe_allocate_available_register(f);     int fbA_reg = spe_allocate_available_register(f); -   int one_reg = spe_allocate_available_register(f);     int tmp_reg = spe_allocate_available_register(f); -   boolean one_reg_set = false; /* avoid setting one_reg more than once */ +   /* These values might or might not eventually get put into +    * registers.  We avoid allocating them and setting them until +    * they're actually needed; then we avoid setting them more than +    * once, and release them at the end of code generation. +    */ +   boolean one_reg_set = false;  +   int one_reg; +#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ +   one_reg = spe_allocate_available_register(f); \ +   spe_load_float(f, one_reg, 1.0f); \ +   one_reg_set = true; \ +} +#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ +   spe_release_register(f, one_reg); \ +} +   +   boolean const_color_set = false; +   int constR_reg, constG_reg, constB_reg; +#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ +   constR_reg = spe_allocate_available_register(f); \ +   constG_reg = spe_allocate_available_register(f); \ +   constG_reg = spe_allocate_available_register(f); \ +   spe_load_float(f, constR_reg, blend_color->color[0]); \ +   spe_load_float(f, constG_reg, blend_color->color[1]); \ +   spe_load_float(f, constB_reg, blend_color->color[2]); \ +   const_color_set = true;\ +} +#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ +   spe_release_register(f, constR_reg); \ +   spe_release_register(f, constG_reg); \ +   spe_release_register(f, constB_reg); \ +} + +   boolean const_alpha_set = false; +   int constA_reg; +#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ +   constA_reg = spe_allocate_available_register(f); \ +   spe_load_float(f, constA_reg, blend_color->color[3]); \ +   const_alpha_set = true; \ +} +#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ +   spe_release_register(f, constA_reg); \ +} + +   /* Real code starts here */     ASSERT(blend->blend_enable); @@ -348,30 +421,161 @@ gen_blend(const struct pipe_blend_state *blend,     /* -    * Compute Src RGB terms +    * Compute Src RGB terms.  We're actually looking for the value +    * of (the appropriate RGB factors) * (the incoming source RGB color).      */     switch (blend->rgb_src_factor) {     case PIPE_BLENDFACTOR_ONE: +      /* factors = (1,1,1), so term = (R,G,B) */        spe_move(f, term1R_reg, fragR_reg);        spe_move(f, term1G_reg, fragG_reg);        spe_move(f, term1B_reg, fragB_reg);        break;     case PIPE_BLENDFACTOR_ZERO: -      spe_zero(f, term1R_reg); -      spe_zero(f, term1G_reg); -      spe_zero(f, term1B_reg); +      /* factors = (0,0,0), so term = (0,0,0) */ +      spe_load_float(f, term1R_reg, 0.0f); +      spe_load_float(f, term1G_reg, 0.0f); +      spe_load_float(f, term1B_reg, 0.0f);        break;     case PIPE_BLENDFACTOR_SRC_COLOR: +      /* factors = (R,G,B), so term = (R*R, G*G, B*B) */        spe_fm(f, term1R_reg, fragR_reg, fragR_reg);        spe_fm(f, term1G_reg, fragG_reg, fragG_reg);        spe_fm(f, term1B_reg, fragB_reg, fragB_reg);        break;     case PIPE_BLENDFACTOR_SRC_ALPHA: +      /* factors = (A,A,A), so term = (R*A, G*A, B*A) */        spe_fm(f, term1R_reg, fragR_reg, fragA_reg);        spe_fm(f, term1G_reg, fragG_reg, fragA_reg);        spe_fm(f, term1B_reg, fragB_reg, fragA_reg);        break; -      /* XXX more cases */ +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ +      /* we'll need the optional constant {1,1,1,1} register */ +      SET_ONE_REG_IF_UNSET(f) +      /* tmp = 1 - R */ +      spe_fs(f, tmp_reg, one_reg, fragR_reg); +      /* term = R * tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      /* repeat for G and B */ +      spe_fs(f, tmp_reg, one_reg, fragG_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fs(f, tmp_reg, one_reg, fragB_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_DST_COLOR: +      /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ +      spe_fm(f, term1R_reg, fragR_reg, fbR_reg); +      spe_fm(f, term1G_reg, fragG_reg, fbG_reg); +      spe_fm(f, term1B_reg, fragB_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ +      /* we'll need the optional constant {1,1,1,1} register */ +      SET_ONE_REG_IF_UNSET(f) +      /* tmp = 1 - Rfb */ +      spe_fs(f, tmp_reg, one_reg, fbR_reg); +      /* term = R * tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      /* repeat for G and B */ +      spe_fs(f, tmp_reg, one_reg, fbG_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fs(f, tmp_reg, one_reg, fbB_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ +      /* we'll need the optional constant {1,1,1,1} register */ +      SET_ONE_REG_IF_UNSET(f) +      /* tmp = 1 - A */ +      spe_fs(f, tmp_reg, one_reg, fragA_reg); +      /* term = R * tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      /* repeat for G and B with the same (1-A) factor */ +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_DST_ALPHA: +      /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ +      spe_fm(f, term1R_reg, fragR_reg, fbA_reg); +      spe_fm(f, term1G_reg, fragG_reg, fbA_reg); +      spe_fm(f, term1B_reg, fragB_reg, fbA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ +      /* we'll need the optional constant {1,1,1,1} register */ +      SET_ONE_REG_IF_UNSET(f) +      /* tmp = 1 - A */ +      spe_fs(f, tmp_reg, one_reg, fbA_reg); +      /* term = R * tmp, G*tmp, and B*tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_COLOR: +      /* We'll need the optional blend color registers */ +      SET_CONST_COLOR_IF_UNSET(f,blend_color) +      /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ +      spe_fm(f, term1R_reg, fragR_reg, constR_reg); +      spe_fm(f, term1G_reg, fragG_reg, constG_reg); +      spe_fm(f, term1B_reg, fragB_reg, constB_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      /* we'll need the optional constant alpha register */ +      SET_CONST_ALPHA_IF_UNSET(f, blend_color) +      /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ +      spe_fm(f, term1R_reg, fragR_reg, constA_reg); +      spe_fm(f, term1G_reg, fragG_reg, constA_reg); +      spe_fm(f, term1B_reg, fragB_reg, constA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      /* We need both the optional {1,1,1,1} register, and the optional +       * constant color registers +       */ +      SET_ONE_REG_IF_UNSET(f) +      SET_CONST_COLOR_IF_UNSET(f, blend_color) +      /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ +      spe_fs(f, tmp_reg, one_reg, constR_reg); +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      spe_fs(f, tmp_reg, one_reg, constG_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fs(f, tmp_reg, one_reg, constB_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      /* We need the optional {1,1,1,1} register and the optional  +       * constant alpha register +       */ +      SET_ONE_REG_IF_UNSET(f) +      SET_CONST_ALPHA_IF_UNSET(f, blend_color) +      /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ +      spe_fs(f, tmp_reg, one_reg, constA_reg); +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      /* We'll need the optional {1,1,1,1} register */ +      SET_ONE_REG_IF_UNSET(f) +      /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so  +       * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) +       */ +      /* tmp = 1 - Afb */ +      spe_fs(f, tmp_reg, one_reg, fbA_reg); +      /* tmp = min(A,tmp) */ +      FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) +      /* term = R*tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; + +      /* non-OpenGL cases? */ +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +     default:        ASSERT(0);     } @@ -421,6 +625,7 @@ gen_blend(const struct pipe_blend_state *blend,     case PIPE_BLENDFACTOR_INV_SRC_ALPHA:        /* one = {1.0, 1.0, 1.0, 1.0} */        if (!one_reg_set) { +         one_reg = spe_allocate_available_register(f);           spe_load_float(f, one_reg, 1.0f);           one_reg_set = true;        } @@ -432,6 +637,14 @@ gen_blend(const struct pipe_blend_state *blend,        spe_fm(f, term2B_reg, fbB_reg, tmp_reg);        break;        /* XXX more cases */ +      // GL_ONE_MINUS_SRC_COLOR +      // GL_DST_COLOR +      // GL_ONE_MINUS_DST_COLOR +      // GL_DST_ALPHA +      // GL_CONSTANT_COLOR +      // GL_ONE_MINUS_CONSTANT_COLOR +      // GL_CONSTANT_ALPHA +      // GL_ONE_MINUS_CONSTANT_ALPHA     default:        ASSERT(0);     } @@ -452,6 +665,7 @@ gen_blend(const struct pipe_blend_state *blend,     case PIPE_BLENDFACTOR_INV_SRC_ALPHA:        /* one = {1.0, 1.0, 1.0, 1.0} */        if (!one_reg_set) { +         one_reg = spe_allocate_available_register(f);           spe_load_float(f, one_reg, 1.0f);           one_reg_set = true;        } @@ -461,6 +675,14 @@ gen_blend(const struct pipe_blend_state *blend,        spe_fm(f, term2A_reg, fbA_reg, tmp_reg);        break;        /* XXX more cases */ +      // GL_ONE_MINUS_SRC_COLOR +      // GL_DST_COLOR +      // GL_ONE_MINUS_DST_COLOR +      // GL_DST_ALPHA +      // GL_CONSTANT_COLOR +      // GL_ONE_MINUS_CONSTANT_COLOR +      // GL_CONSTANT_ALPHA +      // GL_ONE_MINUS_CONSTANT_ALPHA     default:        ASSERT(0);     } @@ -479,7 +701,21 @@ gen_blend(const struct pipe_blend_state *blend,        spe_fs(f, fragG_reg, term1G_reg, term2G_reg);        spe_fs(f, fragB_reg, term1B_reg, term2B_reg);        break; -      /* XXX more cases */ +   case PIPE_BLEND_REVERSE_SUBTRACT: +      spe_fs(f, fragR_reg, term2R_reg, term1R_reg); +      spe_fs(f, fragG_reg, term2G_reg, term1G_reg); +      spe_fs(f, fragB_reg, term2B_reg, term1B_reg); +      break; +   case PIPE_BLEND_MIN: +      FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) +      FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) +      FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) +      break; +   case PIPE_BLEND_MAX: +      FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) +      FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) +      FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) +      break;     default:        ASSERT(0);     } @@ -494,7 +730,15 @@ gen_blend(const struct pipe_blend_state *blend,     case PIPE_BLEND_SUBTRACT:        spe_fs(f, fragA_reg, term1A_reg, term2A_reg);        break; -      /* XXX more cases */ +   case PIPE_BLEND_REVERSE_SUBTRACT: +      spe_fs(f, fragA_reg, term2A_reg, term1A_reg); +      break; +   case PIPE_BLEND_MIN: +      FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) +      break; +   case PIPE_BLEND_MAX: +      FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) +      break;     default:        ASSERT(0);     } @@ -514,8 +758,12 @@ gen_blend(const struct pipe_blend_state *blend,     spe_release_register(f, fbB_reg);     spe_release_register(f, fbA_reg); -   spe_release_register(f, one_reg);     spe_release_register(f, tmp_reg); + +   /* Free any optional registers that actually got used */ +   RELEASE_ONE_REG_IF_USED(f) +   RELEASE_CONST_COLOR_IF_USED(f) +   RELEASE_CONST_ALPHA_IF_USED(f)  } @@ -629,6 +877,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)     const struct pipe_depth_stencil_alpha_state *dsa =        &cell->depth_stencil->base;     const struct pipe_blend_state *blend = &cell->blend->base; +   const struct pipe_blend_color *blend_color = &cell->blend_color;     const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;     /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -651,7 +900,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)     int fbRGBA_reg;  /**< framebuffer's RGBA colors for quad */     int fbZS_reg;    /**< framebuffer's combined z/stencil values for quad */ -   spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);     spe_allocate_register(f, x_reg);     spe_allocate_register(f, y_reg);     spe_allocate_register(f, color_tile_reg); @@ -816,7 +1064,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)     if (blend->blend_enable) { -      gen_blend(blend, f, color_format, +      gen_blend(blend, blend_color, f, color_format,                  fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);     } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 2da3097983..8a389cd6aa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -100,14 +100,19 @@ cell_emit_state(struct cell_context *cell)              = cell_batch_alloc(cell, sizeof(*fops));        struct spe_function spe_code; +      /* Prepare the buffer that will hold the generated code. */ +      spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); +        /* generate new code */        cell_gen_fragment_function(cell, &spe_code); +        /* put the new code into the batch buffer */        fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;        memcpy(&fops->code, spe_code.store,               SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);        fops->dsa = cell->depth_stencil->base;        fops->blend = cell->blend->base; +        /* free codegen buffer */        spe_release_func(&spe_code);     } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 78260c4259..da2cb08972 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -50,7 +50,31 @@ helpful headers:  /opt/cell/sdk/usr/include/libmisc.h  */ +/* Set to 0 to disable all extraneous debugging code */ +#define DEBUG 1 + +#if DEBUG  boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; + +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define DEBUG_PRINTF(format,...) \ +   if (Debug) \ +      printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#define D_PRINTF(flag, format,...) \ +   if (spu.init.debug_flags & (flag)) \ +      printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) + +#else + +#define DEBUG_PRINTF(...) +#define D_PRINTF(...) + +#endif  struct spu_global spu; @@ -133,9 +157,7 @@ really_clear_tiles(uint surfaceIndex)  static void  cmd_clear_surface(const struct cell_command_clear_surface *clear)  { -   if (Debug) -      printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, -             clear->surface, clear->value); +   DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);     if (clear->surface == 0) {        spu.fb.color_clear_value = clear->value; @@ -203,17 +225,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)  #endif /* CLEAR_OPT */ -   if (Debug) -      printf("SPU %u: CLEAR SURF done\n", spu.init.id); +   DEBUG_PRINTF("CLEAR SURF done\n");  }  static void  cmd_release_verts(const struct cell_command_release_verts *release)  { -   if (Debug) -      printf("SPU %u: RELEASE VERTS %u\n", -             spu.init.id, release->vertex_buf); +   DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);     ASSERT(release->vertex_buf != ~0U);     release_buffer(release->vertex_buf);  } @@ -228,16 +247,30 @@ cmd_release_verts(const struct cell_command_release_verts *release)  static void  cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)  { -   if (Debug) -      printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); +   DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");     /* Copy SPU code from batch buffer to spu buffer */     memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);     /* Copy state info (for fallback case only) */     memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));     memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); -   /* Point function pointer at new code */ -   spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; +   /* Parity twist!  For now, always use the fallback code by default, +    * only switching to codegen when specifically requested.  This +    * allows us to develop freely without risking taking down the +    * branch. +    * +    * Later, the parity of this check will be reversed, so that +    * codegen is *always* used, unless we specifically indicate that +    * we don't want it. +    * +    * Eventually, the option will be removed completely, because in +    * final code we'll always use codegen and won't even provide the +    * raw state records that the fallback code requires. +    */ +   if (spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) { +      spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; +   } +   /* otherwise, the default fallback code remains in place */     spu.read_depth = spu.depth_stencil_alpha.depth.enabled;     spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; @@ -247,8 +280,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)  static void  cmd_state_fragment_program(const struct cell_command_fragment_program *fp)  { -   if (Debug) -      printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); +   DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");     /* Copy SPU code from batch buffer to spu buffer */     memcpy(spu.fragment_program_code, fp->code,            SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -262,9 +294,7 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp)  static void  cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)  { -   if (Debug) -      printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x  zformat 0x%x\n", -             spu.init.id, +   DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x  zformat 0x%x\n",               cmd->width,               cmd->height,               cmd->color_start, @@ -309,9 +339,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)  static void  cmd_state_sampler(const struct cell_command_sampler *sampler)  { -   if (Debug) -      printf("SPU %u: SAMPLER [%u]\n", -             spu.init.id, sampler->unit); +   DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);     spu.sampler[sampler->unit] = sampler->state;     if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) @@ -328,11 +356,9 @@ cmd_state_texture(const struct cell_command_texture *texture)     const uint width = texture->width;     const uint height = texture->height; -   if (Debug) { -      printf("SPU %u: TEXTURE [%u] at %p  size %u x %u\n", spu.init.id, +   DEBUG_PRINTF("TEXTURE [%u] at %p  size %u x %u\n",               texture->unit, texture->start,               texture->width, texture->height); -   }     spu.texture[unit].start = texture->start;     spu.texture[unit].width = width; @@ -351,10 +377,7 @@ cmd_state_texture(const struct cell_command_texture *texture)  static void  cmd_state_vertex_info(const struct vertex_info *vinfo)  { -   if (Debug) { -      printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, -             vinfo->num_attribs); -   } +   DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);     ASSERT(vinfo->num_attribs >= 1);     ASSERT(vinfo->num_attribs <= 8);     memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -393,8 +416,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)  static void  cmd_finish(void)  { -   if (Debug) -      printf("SPU %u: FINISH\n", spu.init.id); +   DEBUG_PRINTF("FINISH\n");     really_clear_tiles(0);     /* wait for all outstanding DMAs to finish */     mfc_write_tag_mask(~0); @@ -419,9 +441,8 @@ cmd_batch(uint opcode)     const unsigned usize = size / sizeof(buffer[0]);     uint pos; -   if (Debug) -      printf("SPU %u: BATCH buffer %u, len %u, from %p\n", -             spu.init.id, buf, size, spu.init.buffers[buf]); +   DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", +             buf, size, spu.init.buffers[buf]);     ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -440,8 +461,7 @@ cmd_batch(uint opcode)     wait_on_mask(1 << TAG_BATCH_BUFFER);     /* Tell PPU we're done copying the buffer to local store */ -   if (Debug) -      printf("SPU %u: release batch buf %u\n", spu.init.id, buf); +   DEBUG_PRINTF("release batch buf %u\n", buf);     release_buffer(buf);     /* @@ -571,8 +591,7 @@ cmd_batch(uint opcode)        }     } -   if (Debug) -      printf("SPU %u: BATCH complete\n", spu.init.id); +   DEBUG_PRINTF("BATCH complete\n");  } @@ -585,8 +604,7 @@ main_loop(void)     struct cell_command cmd;     int exitFlag = 0; -   if (Debug) -      printf("SPU %u: Enter main loop\n", spu.init.id); +   DEBUG_PRINTF("Enter main loop\n");     ASSERT((sizeof(struct cell_command) & 0xf) == 0);     ASSERT_ALIGN16(&cmd); @@ -595,14 +613,12 @@ main_loop(void)        unsigned opcode;        int tag = 0; -      if (Debug) -         printf("SPU %u: Wait for cmd...\n", spu.init.id); +      DEBUG_PRINTF("Wait for cmd...\n");        /* read/wait from mailbox */        opcode = (unsigned int) spu_read_in_mbox(); -      if (Debug) -         printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); +      DEBUG_PRINTF("got cmd 0x%x\n", opcode);        /* command payload */        mfc_get(&cmd,  /* dest */ @@ -619,8 +635,7 @@ main_loop(void)        switch (opcode & CELL_CMD_OPCODE_MASK) {        case CELL_CMD_EXIT: -         if (Debug) -            printf("SPU %u: EXIT\n", spu.init.id); +         DEBUG_PRINTF("EXIT\n");           exitFlag = 1;           break;        case CELL_CMD_VS_EXECUTE: @@ -632,13 +647,12 @@ main_loop(void)           cmd_batch(opcode);           break;        default: -         printf("Bad opcode!\n"); +         printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);        }     } -   if (Debug) -      printf("SPU %u: Exit main loop\n", spu.init.id); +   DEBUG_PRINTF("Exit main loop\n");     spu_dcache_report();  } @@ -653,7 +667,8 @@ one_time_init(void)     invalidate_tex_cache();     /* Install default/fallback fragment processing function. -    * This will normally be overriden by a code-gen'd function. +    * This will normally be overriden by a code-gen'd function +    * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.      */     spu.fragment_ops = spu_fallback_fragment_ops;  } @@ -685,8 +700,8 @@ main(main_param_t speid, main_param_t argp)     one_time_init(); -   if (Debug) -      printf("SPU: main() speid=%lu\n", (unsigned long) speid); +   DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); +   D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");     mfc_get(&spu.init,  /* dest */             (unsigned int) argp, /* src */ | 
