/* * (C) Copyright IBM Corporation 2008 * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file * Generate code to perform all per-fragment operations. * * Code generated by these functions perform both alpha, depth, and stencil * testing as well as alpha blending. * * \note * Occlusion query is not supported, but this is the right place to add that * support. * * \author Ian Romanick */ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "cell_context.h" #include "rtasm/rtasm_ppc_spe.h" /** * Generate code to perform alpha testing. * * The code generated by this function uses the register specificed by * \c mask as both an input and an output. * * \param dsa Current alpha-test state * \param f Function to which code should be appended * \param mask Index of register containing active fragment mask * \param alphas Index of register containing per-fragment alpha values * * \note Emits a maximum of 6 instructions. */ static void emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask, int alphas) { /* If the alpha function is either NEVER or ALWAYS, there is no need to * load the reference value into a register. ALWAYS is a fairly common * case, and this optimization saves 2 instructions. */ if (dsa->alpha.enabled && (dsa->alpha.func != PIPE_FUNC_NEVER) && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { int ref = spe_allocate_available_register(f); int tmp_a = spe_allocate_available_register(f); int tmp_b = spe_allocate_available_register(f); union { float f; unsigned u; } ref_val; boolean complement = FALSE; ref_val.f = dsa->alpha.ref; spe_il(f, ref, ref_val.u & 0x0000ffff); spe_ilh(f, ref, ref_val.u >> 16); switch (dsa->alpha.func) { case PIPE_FUNC_NOTEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: spe_fceq(f, tmp_a, ref, alphas); break; case PIPE_FUNC_LEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: spe_fcgt(f, tmp_a, ref, alphas); break; case PIPE_FUNC_LESS: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: spe_fcgt(f, tmp_a, ref, alphas); spe_fceq(f, tmp_b, ref, alphas); spe_or(f, tmp_a, tmp_b, tmp_a); break; case PIPE_FUNC_ALWAYS: case PIPE_FUNC_NEVER: default: assert(0); break; } if (complement) { spe_andc(f, mask, mask, tmp_a); } else { spe_and(f, mask, mask, tmp_a); } spe_release_register(f, ref); spe_release_register(f, tmp_a); spe_release_register(f, tmp_b); } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { spe_il(f, mask, 0); } } /** * \param dsa Current depth-test state * \param f Function to which code should be appended * \param m Mask of allocated / free SPE registers * \param mask Index of register to contain depth-pass mask * \param stored Index of register containing values from depth buffer * \param calculated Index of register containing per-fragment depth values * * \return * If the calculated depth comparison mask is the actual mask, \c FALSE is * returned. If the calculated depth comparison mask is the compliment of * the actual mask, \c TRUE is returned. * * \note Emits a maximum of 3 instructions. */ static boolean emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask, int stored, int calculated) { unsigned func = (dsa->depth.enabled) ? dsa->depth.func : PIPE_FUNC_ALWAYS; int tmp = spe_allocate_available_register(f); boolean compliment = FALSE; switch (func) { case PIPE_FUNC_NEVER: spe_il(f, mask, 0); break; case PIPE_FUNC_NOTEQUAL: compliment = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: spe_ceq(f, mask, calculated, stored); break; case PIPE_FUNC_LEQUAL: compliment = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: spe_clgt(f, mask, calculated, stored); break; case PIPE_FUNC_LESS: compliment = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: spe_clgt(f, mask, calculated, stored); spe_ceq(f, tmp, calculated, stored); spe_or(f, mask, mask, tmp); break; case PIPE_FUNC_ALWAYS: spe_il(f, mask, ~0); break; default: assert(0); break; } spe_release_register(f, tmp); return compliment; } /** * \note Emits a maximum of 5 instructions. */ static void emit_stencil_op(struct spe_function *f, int out, int in, int mask, unsigned op, unsigned ref) { const int clamp = spe_allocate_available_register(f); const int tmp = spe_allocate_available_register(f); switch(op) { case PIPE_STENCIL_OP_KEEP: assert(0); case PIPE_STENCIL_OP_ZERO: spe_il(f, out, 0); break; case PIPE_STENCIL_OP_REPLACE: spe_il(f, out, ref); break; case PIPE_STENCIL_OP_INCR: spe_il(f, clamp, 0x0ff); spe_ai(f, out, in, 1); spe_cgti(f, tmp, out, clamp); spe_selb(f, out, out, clamp, tmp); break; case PIPE_STENCIL_OP_DECR: spe_il(f, clamp, 0); spe_ai(f, out, in, -1); spe_cgti(f, tmp, out, clamp); spe_selb(f, out, clamp, out, tmp); break; case PIPE_STENCIL_OP_INCR_WRAP: spe_ai(f, out, in, 1); break; case PIPE_STENCIL_OP_DECR_WRAP: spe_ai(f, out, in, -1); break; case PIPE_STENCIL_OP_INVERT: spe_nor(f, out, in, in); break; default: assert(0); } spe_release_register(f, tmp); spe_release_register(f, clamp); spe_selb(f, out, in, out, mask); } /** * \param dsa Depth / stencil test state * \param face 0 for front face, 1 for back face * \param f Function to append instructions to * \param reg_mask Mask of allocated registers * \param mask Register containing mask of fragments passing the * alpha test * \param depth_mask Register containing mask of fragments passing the * depth test * \param depth_compliment Is \c depth_mask the compliment of the actual mask? * \param stencil Register containing values from stencil buffer * \param depth_pass Register to store mask of fragments passing stencil test * and depth test * * \note * Emits a maximum of 10 + (3 * 5) = 25 instructions. */ static int emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, unsigned face, struct spe_function *f, int mask, int depth_mask, boolean depth_complement, int stencil, int depth_pass) { int stencil_fail = spe_allocate_available_register(f); int depth_fail = spe_allocate_available_register(f); int stencil_mask = spe_allocate_available_register(f); int stencil_pass = spe_allocate_available_register(f); int face_stencil = spe_allocate_available_register(f); int stencil_src = stencil; const unsigned ref = (dsa->stencil[face].ref_value & dsa->stencil[face].value_mask); boolean complement = FALSE; int stored = spe_allocate_available_register(f); int tmp = spe_allocate_available_register(f); if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) && (dsa->stencil[face].value_mask != 0x0ff)) { spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); } switch (dsa->stencil[face].func) { case PIPE_FUNC_NEVER: spe_il(f, stencil_mask, 0); break; case PIPE_FUNC_NOTEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: spe_ceqi(f, stencil_mask, stored, ref); break; case PIPE_FUNC_LEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: spe_clgti(f, stencil_mask, stored, ref); break; case PIPE_FUNC_LESS: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: spe_clgti(f, stencil_mask, stored, ref); spe_ceqi(f, tmp, stored, ref); spe_or(f, stencil_mask, stencil_mask, tmp); break; case PIPE_FUNC_ALWAYS: /* See comment below. */ break; default: assert(0); break; } spe_release_register(f, stored); spe_release_register(f, tmp); /* ALWAYS is a very common stencil-test, so some effort is applied to * optimize that case. The stencil-pass mask is the same as the input * fragment mask. This makes the stencil-test (above) a no-op, and the * input fragment mask can be "renamed" the stencil-pass mask. */ if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { spe_release_register(f, stencil_pass); stencil_pass = mask; } else { if (complement) { spe_andc(f, stencil_pass, mask, stencil_mask); } else { spe_and(f, stencil_pass, mask, stencil_mask); } } if (depth_complement) { spe_andc(f, depth_pass, stencil_pass, depth_mask); } else { spe_and(f, depth_pass, stencil_pass, depth_mask); } /* Conditionally emit code to update the stencil value under various * condititons. Note that there is no need to generate code under the * following circumstances: * * - Stencil write mask is zero. * - For stencil-fail if the stencil test is ALWAYS * - For depth-fail if the stencil test is NEVER * - For depth-pass if the stencil test is NEVER * - Any of the 3 conditions if the operation is KEEP */ if (dsa->stencil[face].write_mask != 0) { if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { if (complement) { spe_and(f, stencil_fail, mask, stencil_mask); } else { spe_andc(f, stencil_fail, mask, stencil_mask); } emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, dsa->stencil[face].fail_op, dsa->stencil[face].ref_value); stencil_src = face_stencil; } if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { if (depth_complement) { spe_and(f, depth_fail, stencil_pass, depth_mask); } else { spe_andc(f, depth_fail, stencil_pass, depth_mask); } emit_stencil_op(f, face_stencil, stencil_src, depth_fail, dsa->stencil[face].zfail_op, dsa->stencil[face].ref_value); stencil_src = face_stencil; } if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { emit_stencil_op(f, face_stencil, stencil_src, depth_pass, dsa->stencil[face].zpass_op, dsa->stencil[face].ref_value); stencil_src = face_stencil; } } spe_release_register(f, stencil_fail); spe_release_register(f, depth_fail); spe_release_register(f, stencil_mask); if (stencil_pass != mask) { spe_release_register(f, stencil_pass); } /* If all of the stencil operations were KEEP or the stencil write mask was * zero, "stencil_src" will still be set to "stencil". In this case * release the "face_stencil" register. Otherwise apply the stencil write * mask to select bits from the calculated stencil value and the previous * stencil value. */ if (stencil_src == stencil) { spe_release_register(f, face_stencil); } else if (dsa->stencil[face].write_mask != 0x0ff) { int tmp = spe_allocate_available_register(f); spe_il(f, tmp, dsa->stencil[face].write_mask); spe_selb(f, stencil_src, stencil, stencil_src, tmp); spe_release_register(f, tmp); } return stencil_src; } void cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) { struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; struct spe_function *const f = &cdsa->code; /* This code generates a maximum of 6 (alpha test) + 3 (depth test) * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round * up to 64 to make it a happy power-of-two. */ spe_init_func(f, 4 * 64); /* Allocate registers for the function's input parameters. Cleverly (and * clever code is usually dangerous, but I couldn't resist) the generated * function returns a structure. Returned structures start with register * 3, and the structure fields are ordered to match up exactly with the * input parameters. */ int mask = spe_allocate_register(f, 3); int depth = spe_allocate_register(f, 4); int stencil = spe_allocate_register(f, 5); int zvals = spe_allocate_register(f, 6); int frag_a = spe_allocate_register(f, 7); int facing = spe_allocate_register(f, 8); int depth_mask = spe_allocate_available_register(f); boolean depth_complement; emit_alpha_test(dsa, f, mask, frag_a); depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); if (dsa->stencil[0].enabled) { const int front_depth_pass = spe_allocate_available_register(f); int front_stencil = emit_stencil_test(dsa, 0, f, mask, depth_mask, depth_complement, stencil, front_depth_pass); if (dsa->stencil[1].enabled) { const int back_depth_pass = spe_allocate_available_register(f); int back_stencil = emit_stencil_test(dsa, 1, f, mask, depth_mask, depth_complement, stencil, back_depth_pass); /* If the front facing stencil value and the back facing stencil * value are stored in the same register, there is no need to select * a value based on the facing. This can happen if the stencil value * was not modified due to the write masks being zero, the stencil * operations being KEEP, etc. */ if (front_stencil != back_stencil) { spe_selb(f, stencil, back_stencil, front_stencil, facing); } if (back_stencil != stencil) { spe_release_register(f, back_stencil); } if (front_stencil != stencil) { spe_release_register(f, front_stencil); } spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); spe_release_register(f, back_depth_pass); } else { if (front_stencil != stencil) { spe_or(f, stencil, front_stencil, front_stencil); spe_release_register(f, front_stencil); } } spe_release_register(f, front_depth_pass); } else if (dsa->depth.enabled) { if (depth_complement) { spe_andc(f, mask, mask, depth_mask); } else { spe_and(f, mask, mask, depth_mask); } } if (dsa->depth.writemask) { spe_selb(f, depth, depth, zvals, mask); } spe_bi(f, 0, 0, 0); #if 0 { const uint32_t *p = f->store; unsigned i; printf("# alpha (%sabled)\n", (dsa->alpha.enabled) ? "en" : "dis"); printf("# func: %u\n", dsa->alpha.func); printf("# ref: %.2f\n", dsa->alpha.ref); printf("# depth (%sabled)\n", (dsa->depth.enabled) ? "en" : "dis"); printf("# func: %u\n", dsa->depth.func); for (i = 0; i < 2; i++) { printf("# %s stencil (%sabled)\n", (i == 0) ? "front" : "back", (dsa->stencil[i].enabled) ? "en" : "dis"); printf("# func: %u\n", dsa->stencil[i].func); printf("# op (sf, zf, zp): %u %u %u\n", dsa->stencil[i].fail_op, dsa->stencil[i].zfail_op, dsa->stencil[i].zpass_op); printf("# ref value / value mask / write mask: %02x %02x %02x\n", dsa->stencil[i].ref_value, dsa->stencil[i].value_mask, dsa->stencil[i].write_mask); } printf("\t.text\n"); for (/* empty */; p < f->csr; p++) { printf("\t.long\t0x%04x\n", *p); } fflush(stdout); } #endif } /** * \note Emits a maximum of 3 instructions */ static int emit_alpha_factor_calculation(struct spe_function *f, unsigned factor, float const_alpha, int src_alpha, int dst_alpha) { union { float f; unsigned u; } alpha; int factor_reg; int tmp; alpha.f = const_alpha; switch (factor) { case PIPE_BLENDFACTOR_ONE: factor_reg = -1; break; case PIPE_BLENDFACTOR_SRC_ALPHA: factor_reg = spe_allocate_available_register(f); spe_or(f, factor_reg, src_alpha, src_alpha); break; case PIPE_BLENDFACTOR_DST_ALPHA: factor_reg = dst_alpha; break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: factor_reg = -1; break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: const_alpha = 1.0 - const_alpha; /* FALLTHROUGH */ case PIPE_BLENDFACTOR_CONST_ALPHA: factor_reg = spe_allocate_available_register(f); spe_il(f, factor_reg, alpha.u & 0x0ffff); spe_ilh(f, factor_reg, alpha.u >> 16); break; case PIPE_BLENDFACTOR_ZERO: factor_reg = -1; break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: tmp = spe_allocate_available_register(f); factor_reg = spe_allocate_available_register(f); spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); spe_fs(f, factor_reg, tmp, src_alpha); spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: tmp = spe_allocate_available_register(f); factor_reg = spe_allocate_available_register(f); spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); spe_fs(f, factor_reg, tmp, dst_alpha); spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: assert(0); factor_reg = -1; break; } return factor_reg; } /** * \note Emits a maximum of 5 instructions */ static void emit_color_factor_calculation(struct spe_function *f, unsigned sF, unsigned mask, const struct pipe_blend_color *blend_color, const int *src, const int *dst, int *factor) { union { float f[4]; unsigned u[4]; } color; int tmp; unsigned i; color.f[0] = blend_color->color[0]; color.f[1] = blend_color->color[1]; color.f[2] = blend_color->color[2]; color.f[3] = blend_color->color[3]; factor[0] = -1; factor[1] = -1; factor[2] = -1; factor[3] = -1; switch (sF) { case PIPE_BLENDFACTOR_ONE: break; case PIPE_BLENDFACTOR_SRC_COLOR: for (i = 0; i < 3; ++i) { if ((mask & (1U << i)) != 0) { factor[i] = spe_allocate_available_register(f); spe_or(f, factor[i], src[i], src[i]); } } break; case PIPE_BLENDFACTOR_SRC_ALPHA: factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; spe_or(f, factor[0], src[3], src[3]); break; case PIPE_BLENDFACTOR_DST_ALPHA: factor[0] = dst[3]; factor[1] = dst[3]; factor[2] = dst[3]; break; case PIPE_BLENDFACTOR_DST_COLOR: factor[0] = dst[0]; factor[1] = dst[1]; factor[2] = dst[2]; break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: tmp = spe_allocate_available_register(f); factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; /* Alpha saturate means min(As, 1-Ad). */ spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); spe_fs(f, tmp, tmp, dst[3]); spe_fcgt(f, factor[0], tmp, src[3]); spe_selb(f, factor[0], src[3], tmp, factor[0]); spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: color.f[0] = 1.0 - color.f[0]; color.f[1] = 1.0 - color.f[1]; color.f[2] = 1.0 - color.f[2]; /* FALLTHROUGH */ case PIPE_BLENDFACTOR_CONST_COLOR: for (i = 0; i < 3; i++) { factor[i] = spe_allocate_available_register(f); spe_il(f, factor[i], color.u[i] & 0x0ffff); spe_ilh(f, factor[i], color.u[i] >> 16); } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: color.f[3] = 1.0 - color.f[3]; /* FALLTHROUGH */ case PIPE_BLENDFACTOR_CONST_ALPHA: factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; spe_il(f, factor[0], color.u[3] & 0x0ffff); spe_ilh(f, factor[0], color.u[3] >> 16); break; case PIPE_BLENDFACTOR_ZERO: break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: tmp = spe_allocate_available_register(f); spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); for (i = 0; i < 3; ++i) { if ((mask & (1U << i)) != 0) { factor[i] = spe_allocate_available_register(f); spe_fs(f, factor[i], tmp, src[i]); } } spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: tmp = spe_allocate_available_register(f); factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); spe_fs(f, factor[0], tmp, src[3]); spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: tmp = spe_allocate_available_register(f); factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); spe_fs(f, factor[0], tmp, dst[3]); spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_INV_DST_COLOR: tmp = spe_allocate_available_register(f); spe_il(f, tmp, 1); spe_cuflt(f, tmp, tmp, 0); for (i = 0; i < 3; ++i) { if ((mask & (1U << i)) != 0) { factor[i] = spe_allocate_available_register(f); spe_fs(f, factor[i], tmp, dst[i]); } } spe_release_register(f, tmp); break; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_COLOR: case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: assert(0); } } static void emit_blend_calculation(struct spe_function *f, unsigned func, unsigned sF, unsigned dF, int src, int src_factor, int dst, int dst_factor) { int tmp = spe_allocate_available_register(f); switch (func) { case PIPE_BLEND_ADD: if (sF == PIPE_BLENDFACTOR_ONE) { if (dF == PIPE_BLENDFACTOR_ZERO) { /* Do nothing. */ } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_fa(f, src, src, dst); } } else if (sF == PIPE_BLENDFACTOR_ZERO) { if (dF == PIPE_BLENDFACTOR_ZERO) { spe_il(f, src, 0); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_or(f, src, dst, dst); } } else { spe_fm(f, tmp, dst, dst_factor); spe_fma(f, src, src, src_factor, tmp); } break; case PIPE_BLEND_SUBTRACT: if (sF == PIPE_BLENDFACTOR_ONE) { if (dF == PIPE_BLENDFACTOR_ZERO) { /* Do nothing. */ } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_fs(f, src, src, dst); } } else if (sF == PIPE_BLENDFACTOR_ZERO) { if (dF == PIPE_BLENDFACTOR_ZERO) { spe_il(f, src, 0); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_il(f, tmp, 0); spe_fs(f, src, tmp, dst); } } else { spe_fm(f, tmp, dst, dst_factor); spe_fms(f, src, src, src_factor, tmp); } break; case PIPE_BLEND_REVERSE_SUBTRACT: if (sF == PIPE_BLENDFACTOR_ONE) { if (dF == PIPE_BLENDFACTOR_ZERO) { spe_il(f, tmp, 0); spe_fs(f, src, tmp, src); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_fs(f, src, dst, src); } } else if (sF == PIPE_BLENDFACTOR_ZERO) { if (dF == PIPE_BLENDFACTOR_ZERO) { spe_il(f, src, 0); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_or(f, src, dst, dst); } } else { spe_fm(f, tmp, src, src_factor); spe_fms(f, src, src, dst_factor, tmp); } break; case PIPE_BLEND_MIN: spe_cgt(f, tmp, src, dst); spe_selb(f, src, dst, src, tmp); break; case PIPE_BLEND_MAX: spe_cgt(f, tmp, src, dst); spe_selb(f, src, src, dst, tmp); break; default: assert(0); } spe_release_register(f, tmp); } /** * Generate code to perform alpha blending on the SPE */ void cell_generate_alpha_blend(struct cell_blend_state *cb, const struct pipe_blend_color *blend_color) { struct pipe_blend_state *const b = &cb->base; struct spe_function *const f = &cb->code; /* This code generates a maximum of 3 (source alpha factor) * + 3 (destination alpha factor) + (3 * 5) (source color factor) * + (3 * 5) (destination color factor) + (4 * 2) (blend equation) * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to * make it a happy power-of-two. */ spe_init_func(f, 4 * 64); const int frag[4] = { spe_allocate_register(f, 3), spe_allocate_register(f, 4), spe_allocate_register(f, 5), spe_allocate_register(f, 6), }; const int pixel[4] = { spe_allocate_register(f, 7), spe_allocate_register(f, 8), spe_allocate_register(f, 9), spe_allocate_register(f, 10), }; const int mask = spe_allocate_register(f, 11); unsigned func[4]; unsigned sF[4]; unsigned dF[4]; unsigned i; int src_factor[4]; int dst_factor[4]; /* Does the selected blend mode make use of the source / destination * color (RGB) blend factors? */ boolean need_color_factor = b->blend_enable && (b->rgb_func != PIPE_BLEND_MIN) && (b->rgb_func != PIPE_BLEND_MAX); /* Does the selected blend mode make use of the source / destination * alpha blend factors? */ boolean need_alpha_factor = b->blend_enable && (b->alpha_func != PIPE_BLEND_MIN) && (b->alpha_func != PIPE_BLEND_MAX); sF[0] = b->rgb_src_factor; sF[1] = sF[0]; sF[2] = sF[0]; sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor; dF[0] = b->rgb_dst_factor; dF[1] = dF[0]; dF[2] = dF[0]; dF[3] = b->rgb_dst_factor; /* If alpha writing is enabled and the alpha blend mode requires use of * the alpha factor, calculate the alpha factor. */ if (((b->colormask & 8) != 0) && need_alpha_factor) { src_factor[3] = emit_alpha_factor_calculation(f, sF[3], blend_color->color[3], frag[3], pixel[3]); /* If the alpha destination blend factor is the same as the alpha source * blend factor, re-use the previously calculated value. */ dst_factor[3] = (dF[3] == sF[3]) ? src_factor[3] : emit_alpha_factor_calculation(f, dF[3], blend_color->color[3], frag[3], pixel[3]); } if (sF[0] == sF[3]) { src_factor[0] = src_factor[3]; src_factor[1] = src_factor[3]; src_factor[2] = src_factor[3]; } else if (sF[0] == dF[3]) { src_factor[0] = dst_factor[3]; src_factor[1] = dst_factor[3]; src_factor[2] = dst_factor[3]; } else if (need_color_factor) { emit_color_factor_calculation(f, b->rgb_src_factor, b->colormask, blend_color, frag, pixel, src_factor); } if (dF[0] == sF[3]) { dst_factor[0] = src_factor[3]; dst_factor[1] = src_factor[3]; dst_factor[2] = src_factor[3]; } else if (dF[0] == dF[3]) { dst_factor[0] = dst_factor[3]; dst_factor[1] = dst_factor[3]; dst_factor[2] = dst_factor[3]; } else if (dF[0] == sF[0]) { dst_factor[0] = src_factor[0]; dst_factor[1] = src_factor[1]; dst_factor[2] = src_factor[2]; } else if (need_color_factor) { emit_color_factor_calculation(f, b->rgb_dst_factor, b->colormask, blend_color, frag, pixel, dst_factor); } func[0] = b->rgb_func; func[1] = func[0]; func[2] = func[0]; func[3] = b->alpha_func; for (i = 0; i < 4; ++i) { if ((b->colormask & (1U << i)) != 0) { emit_blend_calculation(f, func[i], sF[i], dF[i], frag[i], src_factor[i], pixel[i], dst_factor[i]); spe_selb(f, frag[i], pixel[i], frag[i], mask); } else { spe_or(f, frag[i], pixel[i], pixel[i]); } } spe_bi(f, 0, 0, 0); }