From dee18a147d3adaf2578d27837c8f18c92d796c9d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 13 Oct 2008 19:41:26 -0600 Subject: cell: finish-up perspective-corrected interpolation --- src/gallium/drivers/cell/spu/spu_tri.c | 127 +++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 45 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 022d21ba8f..3f1fb4f7c9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -213,7 +213,7 @@ clip_emit_quad(struct setup_stage *setup) * Eg: four colors will be computed (in AoS format). */ static INLINE void -eval_coeff(uint slot, float x, float y, vector float result[4]) +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) { switch (spu.vertex_info.attrib[slot].interp_mode) { case INTERP_CONSTANT: @@ -222,23 +222,43 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) result[QUAD_BOTTOM_LEFT] = result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; break; - case INTERP_LINEAR: - /* fall-through, for now */ - default: { - register vector float dadx = setup.coef[slot].dadx.v; - register vector float dady = setup.coef[slot].dady.v; - register vector float topLeft - = spu_add(setup.coef[slot].a0.v, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); + vector float dadx = setup.coef[slot].dadx.v; + vector float dady = setup.coef[slot].dady.v; + vector float topLeft = + spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); result[QUAD_TOP_LEFT] = topLeft; result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); } + break; + case INTERP_PERSPECTIVE: + { + vector float dadx = setup.coef[slot].dadx.v; + vector float dady = setup.coef[slot].dady.v; + vector float topLeft = + spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + vector float wInv = spu_re(w); /* 1.0 / w */ + + result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); + result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); + result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); + result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); + } + break; + case INTERP_POS: + case INTERP_NONE: + break; + default: + ASSERT(0); } } @@ -248,14 +268,14 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) * XXX this will all be re-written someday. */ static INLINE void -eval_coeff_soa(uint slot, float x, float y, vector float result[4]) +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) { - eval_coeff(slot, x, y, result); + eval_coeff(slot, x, y, w, result); _transpose_matrix4x4(result, result); } - +/** Evalute coefficients to get Z for four pixels in a quad */ static INLINE vector float eval_z(float x, float y) { @@ -269,6 +289,20 @@ eval_z(float x, float y) } +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ + const uint slot = 0; + const float dwdx = setup.coef[slot].dadx.f[3]; + const float dwdy = setup.coef[slot].dady.f[3]; + const float topLeft = setup.coef[slot].a0.f[3] + x * dwdx + y * dwdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; + return spu_add(topLeftv, derivs); +} + + /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -292,14 +326,15 @@ emit_quad( int x, int y, mask_t mask) */ vector float inputs[4*4], outputs[2*4]; vector float fragZ = eval_z((float) x, (float) y); + vector float fragW = eval_w((float) x, (float) y); /* setup inputs */ #if 0 - eval_coeff_soa(1, (float) x, (float) y, inputs); + eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); #else uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { - eval_coeff_soa(i+1, (float) x, (float) y, inputs + i * 4); + eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); } #endif ASSERT(spu.fragment_program); @@ -658,7 +693,6 @@ tri_linear_coeff4(uint slot) -#if 0 /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -667,38 +701,41 @@ tri_linear_coeff4(uint slot) * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void tri_persp_coeff( unsigned slot, - unsigned i ) +static void +tri_persp_coeff4(uint slot) { - /* premultiply by 1/w: - */ - float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; - float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; - float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; - - float botda = mida - mina; - float majda = maxa - mina; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup.vmin->data[slot][i], - setup.vmid->data[slot][i], - setup.vmax->data[slot][i] - ); - */ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - setup.coef[slot].a0.f[i] = (mina - - (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); } -#endif + /** @@ -726,7 +763,7 @@ static void setup_tri_coefficients(void) tri_linear_coeff4(i); break; case INTERP_PERSPECTIVE: - tri_linear_coeff4(i); /* temporary */ + tri_persp_coeff4(i); break; default: ASSERT(0); -- cgit v1.2.3