diff options
Diffstat (limited to 'src/gallium/drivers/cell/ppu/cell_gen_fragment.c')
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 272 |
1 files changed, 260 insertions, 12 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06219d4e98..2c8c9e0d2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,7 +229,36 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, amask_reg); } +/* This is a convenient and oft-used sequence. It chooses + * the smaller of each element of reg1 and reg2, and combines them + * into the result register, as follows: + * + * The Float Compare Greater Than (fcgt) instruction will put + * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. + * + * Then the Select Bits (selb) instruction will take bits from + * reg1 where compare_reg is 0, and from reg2 where compare_reg is + * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, + * and the bits from reg2 where reg1 > reg2, which is exactly the + * MIN operation. + */ +#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg1, reg2, compare_reg); \ + spe_release_register(f, compare_reg); \ +} +/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN + * sequence above, except that the registers specified when selecting + * bits are reversed. + */ +#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg2, reg1, compare_reg); \ + spe_release_register(f, compare_reg); \ +} /** * Generate SPE code to implement the given blend mode for a quad of pixels. @@ -242,6 +271,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, */ static void gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, @@ -262,10 +292,53 @@ gen_blend(const struct pipe_blend_state *blend, int fbB_reg = spe_allocate_available_register(f); int fbA_reg = spe_allocate_available_register(f); - int one_reg = spe_allocate_available_register(f); int tmp_reg = spe_allocate_available_register(f); - boolean one_reg_set = false; /* avoid setting one_reg more than once */ + /* These values might or might not eventually get put into + * registers. We avoid allocating them and setting them until + * they're actually needed; then we avoid setting them more than + * once, and release them at the end of code generation. + */ + boolean one_reg_set = false; + int one_reg; +#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ + one_reg = spe_allocate_available_register(f); \ + spe_load_float(f, one_reg, 1.0f); \ + one_reg_set = true; \ +} +#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ + spe_release_register(f, one_reg); \ +} + + boolean const_color_set = false; + int constR_reg, constG_reg, constB_reg; +#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ + constR_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constR_reg, blend_color->color[0]); \ + spe_load_float(f, constG_reg, blend_color->color[1]); \ + spe_load_float(f, constB_reg, blend_color->color[2]); \ + const_color_set = true;\ +} +#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ + spe_release_register(f, constR_reg); \ + spe_release_register(f, constG_reg); \ + spe_release_register(f, constB_reg); \ +} + + boolean const_alpha_set = false; + int constA_reg; +#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ + constA_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constA_reg, blend_color->color[3]); \ + const_alpha_set = true; \ +} +#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ + spe_release_register(f, constA_reg); \ +} + + /* Real code starts here */ ASSERT(blend->blend_enable); @@ -348,30 +421,161 @@ gen_blend(const struct pipe_blend_state *blend, /* - * Compute Src RGB terms + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color). */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ spe_move(f, term1R_reg, fragR_reg); spe_move(f, term1G_reg, fragG_reg); spe_move(f, term1B_reg, fragB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term1R_reg); - spe_zero(f, term1G_reg); - spe_zero(f, term1B_reg); + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ spe_fm(f, term1R_reg, fragR_reg, fragR_reg); spe_fm(f, term1G_reg, fragG_reg, fragG_reg); spe_fm(f, term1B_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ spe_fm(f, term1R_reg, fragR_reg, fragA_reg); spe_fm(f, term1G_reg, fragG_reg, fragA_reg); spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - R */ + spe_fs(f, tmp_reg, one_reg, fragR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fragG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fragB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - Rfb */ + spe_fs(f, tmp_reg, one_reg, fbR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fbG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fbB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B with the same (1-A) factor */ + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* term = R * tmp, G*tmp, and B*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We'll need the optional blend color registers */ + SET_CONST_COLOR_IF_UNSET(f,blend_color) + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need both the optional {1,1,1,1} register, and the optional + * constant color registers + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_COLOR_IF_UNSET(f, blend_color) + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ + spe_fs(f, tmp_reg, one_reg, constR_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional {1,1,1,1} register and the optional + * constant alpha register + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ + spe_fs(f, tmp_reg, one_reg, constA_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* non-OpenGL cases? */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } @@ -421,6 +625,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -432,6 +637,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2B_reg, fbB_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -452,6 +665,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -461,6 +675,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2A_reg, fbA_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -479,7 +701,21 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragG_reg, term1G_reg, term2G_reg); spe_fs(f, fragB_reg, term1B_reg, term2B_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) + break; default: ASSERT(0); } @@ -494,7 +730,15 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLEND_SUBTRACT: spe_fs(f, fragA_reg, term1A_reg, term2A_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) + break; default: ASSERT(0); } @@ -514,8 +758,12 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, fbB_reg); spe_release_register(f, fbA_reg); - spe_release_register(f, one_reg); spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + RELEASE_ONE_REG_IF_USED(f) + RELEASE_CONST_COLOR_IF_USED(f) + RELEASE_CONST_ALPHA_IF_USED(f) } @@ -629,6 +877,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; const struct pipe_blend_state *blend = &cell->blend->base; + const struct pipe_blend_color *blend_color = &cell->blend_color; const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -651,7 +900,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); spe_allocate_register(f, x_reg); spe_allocate_register(f, y_reg); spe_allocate_register(f, color_tile_reg); @@ -816,7 +1064,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) if (blend->blend_enable) { - gen_blend(blend, f, color_format, + gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } |