diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_tri.c')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 179 |
1 files changed, 87 insertions, 92 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 0d9fcb9997..d727268475 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -29,7 +29,6 @@ * Triangle rendering within a tile. */ -#include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "util/u_math.h" @@ -71,6 +70,12 @@ struct vertex_header { #define MASK_ALL 0xf +#define CHAN0 0 +#define CHAN1 1 +#define CHAN2 2 +#define CHAN3 3 + + #define DEBUG_VERTS 0 /** @@ -144,99 +149,97 @@ struct setup_stage { static struct setup_stage setup; -/** - * Evaluate attribute coefficients (plane equations) to compute - * attribute values for the four fragments in a quad. - * Eg: four colors will be computed (in AoS format). - */ -static INLINE void -eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) +static INLINE vector float +splatx(vector float v) { - switch (spu.vertex_info.attrib[slot].interp_mode) { - case INTERP_CONSTANT: - result[QUAD_TOP_LEFT] = - result[QUAD_TOP_RIGHT] = - result[QUAD_BOTTOM_LEFT] = - result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; - break; - case INTERP_LINEAR: - { - vector float dadx = setup.coef[slot].dadx; - vector float dady = setup.coef[slot].dady; - vector float topLeft = - spu_add(setup.coef[slot].a0, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); - - result[QUAD_TOP_LEFT] = topLeft; - result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); - result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); - result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); - } - break; - case INTERP_PERSPECTIVE: - { - vector float dadx = setup.coef[slot].dadx; - vector float dady = setup.coef[slot].dady; - vector float topLeft = - spu_add(setup.coef[slot].a0, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); - - vector float wInv = spu_re(w); /* 1.0 / w */ - - result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); - result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); - result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); - result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); - } - break; - case INTERP_POS: - case INTERP_NONE: - break; - default: - ASSERT(0); - } + return spu_splats(spu_extract(v, CHAN0)); } - -/** - * As above, but return 4 vectors in SOA format. - * XXX this will all be re-written someday. - */ -static INLINE void -eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) +static INLINE vector float +splaty(vector float v) { - eval_coeff(slot, x, y, w, result); - _transpose_matrix4x4(result, result); + return spu_splats(spu_extract(v, CHAN1)); } +static INLINE vector float +splatz(vector float v) +{ + return spu_splats(spu_extract(v, CHAN2)); +} -/** Evalute coefficients to get Z for four pixels in a quad */ static INLINE vector float -eval_z(float x, float y) +splatw(vector float v) { - const uint slot = 0; - const float dzdx = spu_extract(setup.coef[slot].dadx, 2); - const float dzdy = spu_extract(setup.coef[slot].dady, 2); - const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; - const vector float topLeftv = spu_splats(topLeft); - const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; - return spu_add(topLeftv, derivs); + return spu_splats(spu_extract(v, CHAN3)); } -/** Evalute coefficients to get W for four pixels in a quad */ -static INLINE vector float -eval_w(float x, float y) +/** + * Setup fragment shader inputs by evaluating triangle's vertex + * attribute coefficient info. + * \param x quad x pos + * \param y quad y pos + * \param fragZ returns quad Z values + * \param fragInputs returns fragment program inputs + * Note: this code could be incorporated into the fragment program + * itself to avoid the loop and switch. + */ +static void +eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) { - const uint slot = 0; - const float dwdx = spu_extract(setup.coef[slot].dadx, 3); - const float dwdy = spu_extract(setup.coef[slot].dady, 3); - const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; - const vector float topLeftv = spu_splats(topLeft); - const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; - return spu_add(topLeftv, derivs); + static const vector float deltaX = (const vector float) {0, 1, 0, 1}; + static const vector float deltaY = (const vector float) {0, 0, 1, 1}; + + const uint posSlot = 0; + const vector float pos = setup.coef[posSlot].a0; + const vector float dposdx = setup.coef[posSlot].dadx; + const vector float dposdy = setup.coef[posSlot].dady; + const vector float fragX = spu_splats(x) + deltaX; + const vector float fragY = spu_splats(y) + deltaY; + vector float fragW, wInv; + uint i; + + *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); + fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); + wInv = spu_re(fragW); /* 1 / w */ + + /* loop over fragment program inputs */ + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + uint attr = i + 1; + enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; + + /* constant term */ + vector float a0 = setup.coef[attr].a0; + vector float r0 = splatx(a0); + vector float r1 = splaty(a0); + vector float r2 = splatz(a0); + vector float r3 = splatw(a0); + + if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { + /* linear term */ + vector float dadx = setup.coef[attr].dadx; + vector float dady = setup.coef[attr].dady; + /* Use SPU intrinsics here to get slightly better code. + * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); + */ + r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); + r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); + r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); + r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); + if (interp == INTERP_PERSPECTIVE) { + /* perspective term */ + r0 *= wInv; + r1 *= wInv; + r2 *= wInv; + r3 *= wInv; + } + } + fragInputs[CHAN0] = r0; + fragInputs[CHAN1] = r1; + fragInputs[CHAN2] = r2; + fragInputs[CHAN3] = r3; + fragInputs += 4; + } } @@ -262,19 +265,11 @@ emit_quad( int x, int y, mask_t mask) * Run fragment shader, execute per-fragment ops, update fb/tile. */ vector float inputs[4*4], outputs[2*4]; - vector float fragZ = eval_z((float) x, (float) y); - vector float fragW = eval_w((float) x, (float) y); vector unsigned int kill_mask; + vector float fragZ; + + eval_inputs((float) x, (float) y, &fragZ, inputs); - /* setup inputs */ -#if 0 - eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); -#else - uint i; - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); - } -#endif ASSERT(spu.fragment_program); ASSERT(spu.fragment_ops); |