summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell')
-rw-r--r--src/gallium/drivers/cell/common.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c14
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c13
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c50
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c79
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c133
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.h2
7 files changed, 181 insertions, 112 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 1f6860da11..d5f5c7bbba 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -327,7 +327,7 @@ struct cell_command_sampler
opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
uint unit;
struct pipe_sampler_state state;
- uint32_t pad_[1];
+ uint32_t pad_[2];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5a889a6119..312621fd53 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -1767,7 +1767,7 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_MAD:
return emit_MAD(gen, inst);
case TGSI_OPCODE_LERP:
- return emit_LERP(gen, inst);
+ return emit_LRP(gen, inst);
case TGSI_OPCODE_DP3:
return emit_DP3(gen, inst);
case TGSI_OPCODE_DP4:
@@ -1810,9 +1810,9 @@ emit_instruction(struct codegen *gen,
return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
case TGSI_OPCODE_POW:
return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
case TGSI_OPCODE_TEX:
/* fall-through for now */
@@ -1834,9 +1834,9 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_ENDIF:
return emit_ENDIF(gen, inst);
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
return emit_BGNLOOP(gen, inst);
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_ENDLOOP:
return emit_ENDLOOP(gen, inst);
case TGSI_OPCODE_BRK:
return emit_BRK(gen, inst);
@@ -1875,9 +1875,9 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
assert(gen->num_imm < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
- float val = immed->u.ImmediateFloat32[ch].Float;
+ float val = immed->u[ch].Float;
- if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ if (ch > 0 && val == immed->u[ch - 1].Float) {
/* re-use previous register */
gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index e26594448f..6a63a0e6ce 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -44,13 +44,6 @@
-static unsigned
-minify(unsigned d)
-{
- return MAX2(1, d>>1);
-}
-
-
static void
cell_texture_layout(struct cell_texture *ct)
{
@@ -424,7 +417,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
if (!ctrans->map)
return NULL; /* out of memory */
- if (transfer->usage & PIPE_TRANSFER_READ) {
+ if (transfer->usage == PIPE_TRANSFER_READ ||
+ transfer->usage == PIPE_TRANSFER_READ_WRITE) {
/* need to untwiddle the texture to make a linear version */
const uint bpp = pf_get_size(ct->base.format);
if (bpp == 4) {
@@ -465,7 +459,8 @@ cell_transfer_unmap(struct pipe_screen *screen,
PIPE_BUFFER_USAGE_CPU_READ);
}
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ if (transfer->usage == PIPE_TRANSFER_WRITE ||
+ transfer->usage == PIPE_TRANSFER_READ_WRITE) {
/* The user wrote new texture data into the mapped buffer.
* We need to convert the new linear data into the twiddled/tiled format.
*/
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index e27df2dfb3..0eaae2e451 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -952,7 +952,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -961,7 +960,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_sqrt(r[0].q);
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
@@ -1115,7 +1113,6 @@ exec_instruction(
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
@@ -1136,8 +1133,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -1154,25 +1150,11 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_CND0:
+ case TGSI_OPCODE_DP2A:
ASSERT (0);
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_INDEX:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_frc(r[0].q);
@@ -1184,8 +1166,7 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_flr(r[0].q);
@@ -1201,8 +1182,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
@@ -1212,8 +1192,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_lg2(r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1221,8 +1200,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -1233,7 +1211,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
/* TGSI_OPCODE_XPD */
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
@@ -1275,10 +1253,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- ASSERT (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
@@ -1780,9 +1754,9 @@ exec_instruction(
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
/* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
@@ -1790,9 +1764,9 @@ exec_instruction(
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
/* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 7c225e2f27..5ffb7073ab 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -32,6 +32,7 @@
#include "spu_main.h"
#include "spu_render.h"
+#include "spu_shuffle.h"
#include "spu_tri.h"
#include "spu_tile.h"
#include "cell/common.h"
@@ -267,15 +268,75 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
uint drawn = 0;
- /* loop over tris */
- for (j = 0; j < render->num_indexes; j += 3) {
- const float *v0, *v1, *v2;
-
- v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
- v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
- v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
-
- drawn += tri_draw(v0, v1, v2, tx, ty);
+ const qword vertex_sizes = (qword)spu_splats(vertex_size);
+ const qword verticess = (qword)spu_splats((uint)vertices);
+
+ ASSERT_ALIGN16(&indexes[0]);
+
+ const uint num_indexes = render->num_indexes;
+
+ /* loop over tris
+ * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled
+ * avoiding variable rotates when extracting vertex indices.
+ */
+ for (j = 0; j < num_indexes; j += 24) {
+ /* Load three vectors, containing 24 ushort indices */
+ const qword* lower_qword = (qword*)&indexes[j];
+ const qword indices0 = lower_qword[0];
+ const qword indices1 = lower_qword[1];
+ const qword indices2 = lower_qword[2];
+
+ /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */
+ /* Straightforward rotates for these */
+ qword vs0 = indices0;
+ qword vs1 = si_shlqbyi(indices0, 6);
+ qword vs3 = si_shlqbyi(indices1, 2);
+ qword vs4 = si_shlqbyi(indices1, 8);
+ qword vs6 = si_shlqbyi(indices2, 4);
+ qword vs7 = si_shlqbyi(indices2, 10);
+
+ /* For tri 2 and 5, the three indices are split across two machine
+ * words - rotate and combine */
+ const qword tmp2a = si_shlqbyi(indices0, 12);
+ const qword tmp2b = si_rotqmbyi(indices1, 12|16);
+ qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20)));
+
+ const qword tmp5a = si_shlqbyi(indices1, 14);
+ const qword tmp5b = si_rotqmbyi(indices2, 14|16);
+ qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60)));
+
+ /* unpack indices from halfword slots to word slots */
+ vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0));
+ vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0));
+ vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0));
+ vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0));
+ vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0));
+ vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0));
+ vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0));
+ vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0));
+
+ /* Calculate address of vertex in vertices[] */
+ vs0 = si_mpya(vs0, vertex_sizes, verticess);
+ vs1 = si_mpya(vs1, vertex_sizes, verticess);
+ vs2 = si_mpya(vs2, vertex_sizes, verticess);
+ vs3 = si_mpya(vs3, vertex_sizes, verticess);
+ vs4 = si_mpya(vs4, vertex_sizes, verticess);
+ vs5 = si_mpya(vs5, vertex_sizes, verticess);
+ vs6 = si_mpya(vs6, vertex_sizes, verticess);
+ vs7 = si_mpya(vs7, vertex_sizes, verticess);
+
+ /* Select the appropriate call based on the number of vertices
+ * remaining */
+ switch(num_indexes - j) {
+ default: drawn += tri_draw(vs7, tx, ty);
+ case 21: drawn += tri_draw(vs6, tx, ty);
+ case 18: drawn += tri_draw(vs5, tx, ty);
+ case 15: drawn += tri_draw(vs4, tx, ty);
+ case 12: drawn += tri_draw(vs3, tx, ty);
+ case 9: drawn += tri_draw(vs2, tx, ty);
+ case 6: drawn += tri_draw(vs1, tx, ty);
+ case 3: drawn += tri_draw(vs0, tx, ty);
+ }
}
//printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index d727268475..58be001be4 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -133,7 +133,15 @@ struct setup_stage {
uint tx, ty; /**< position of current tile (x, y) */
- int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
+ union {
+ struct {
+ int cliprect_minx;
+ int cliprect_miny;
+ int cliprect_maxx;
+ int cliprect_maxy;
+ };
+ qword cliprect;
+ };
struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
@@ -432,6 +440,41 @@ print_vertex(const struct vertex_header *v)
}
#endif
+/* Returns the minimum of each slot of two vec_float4s as qwords.
+ * i.e. return[n] = min(q0[n],q1[n]);
+ */
+static qword
+minfq(qword q0, qword q1)
+{
+ const qword q0q1m = si_fcgt(q0, q1);
+ return si_selb(q0, q1, q0q1m);
+}
+
+/* Returns the minimum of each slot of three vec_float4s as qwords.
+ * i.e. return[n] = min(q0[n],q1[n],q2[n]);
+ */
+static qword
+min3fq(qword q0, qword q1, qword q2)
+{
+ return minfq(minfq(q0, q1), q2);
+}
+
+/* Returns the maximum of each slot of two vec_float4s as qwords.
+ * i.e. return[n] = min(q0[n],q1[n],q2[n]);
+ */
+static qword
+maxfq(qword q0, qword q1) {
+ const qword q0q1m = si_fcgt(q0, q1);
+ return si_selb(q1, q0, q0q1m);
+}
+
+/* Returns the maximum of each slot of three vec_float4s as qwords.
+ * i.e. return[n] = min(q0[n],q1[n],q2[n]);
+ */
+static qword
+max3fq(qword q0, qword q1, qword q2) {
+ return maxfq(maxfq(q0, q1), q2);
+}
/**
* Sort vertices from top to bottom.
@@ -440,9 +483,7 @@ print_vertex(const struct vertex_header *v)
* \return FALSE if tri is totally outside tile, TRUE otherwise
*/
static boolean
-setup_sort_vertices(const struct vertex_header *v0,
- const struct vertex_header *v1,
- const struct vertex_header *v2)
+setup_sort_vertices(const qword vs)
{
float area, sign;
@@ -455,57 +496,57 @@ setup_sort_vertices(const struct vertex_header *v0,
}
#endif
- /* determine bottom to top order of vertices */
{
+ /* Load the float values for various processing... */
+ const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
+ const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
+ const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);
+
+ /* Check if triangle is completely outside the tile bounds
+ * Find the min and max x and y positions of the three poits */
+ const qword minf = min3fq(f0, f1, f2);
+ const qword maxf = max3fq(f0, f1, f2);
+
+ /* Compare min and max against cliprect vals */
+ const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
+ const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));
+
+ /* Use a little magic to work out of the tri is visible or not */
+ if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;
+
+ /* determine bottom to top order of vertices */
/* A table of shuffle patterns for putting vertex_header pointers into
correct order. Quite magical. */
- const vec_uchar16 sort_order_patterns[] = {
- SHUFFLE4(A,B,C,C),
- SHUFFLE4(C,A,B,C),
- SHUFFLE4(A,C,B,C),
- SHUFFLE4(B,C,A,C),
- SHUFFLE4(B,A,C,C),
- SHUFFLE4(C,B,A,C) };
-
- /* The vertex_header pointers, packed for easy shuffling later */
- const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
+ const qword sort_order_patterns[] = {
+ SHUFB4(A,B,C,C),
+ SHUFB4(C,A,B,C),
+ SHUFB4(A,C,B,C),
+ SHUFB4(B,C,A,C),
+ SHUFB4(B,A,C,C),
+ SHUFB4(C,B,A,C) };
/* Collate y values into two vectors for comparison.
Using only one shuffle constant! ;) */
- const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C));
- const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C));
- const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C));
- const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C));
+ const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
+ const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
+ const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
+ const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));
/* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
- const vec_uint4 compare = spu_cmpgt(y_012, y_120);
+ const qword compare = si_fcgt(y_012, y_120);
/* Compress the result of the comparison into 4 bits */
- const vec_uint4 gather = spu_gather(compare);
+ const qword gather = si_gb(compare);
/* Subtract one to attain the index into the LUT. Magical. */
- const unsigned int index = spu_extract(gather, 0) - 1;
+ const unsigned int index = si_to_uint(gather) - 1;
/* Load the appropriate pattern and construct the desired vector. */
- setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]);
+ setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
/* Using the result of the comparison, set sign.
Very magical. */
- sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
+ sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);
}
- /* Check if triangle is completely outside the tile bounds */
- if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
- return FALSE;
- if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
- return FALSE;
- if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
- spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
- spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
- return FALSE;
- if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
- spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
- spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
- return FALSE;
-
setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
@@ -761,21 +802,19 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
* The tile data should have already been fetched.
*/
boolean
-tri_draw(const float *v0, const float *v1, const float *v2,
+tri_draw(const qword vs,
uint tx, uint ty)
{
setup.tx = tx;
setup.ty = ty;
/* set clipping bounds to tile bounds */
- setup.cliprect_minx = tx * TILE_SIZE;
- setup.cliprect_miny = ty * TILE_SIZE;
- setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
- setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
-
- if (!setup_sort_vertices((struct vertex_header *) v0,
- (struct vertex_header *) v1,
- (struct vertex_header *) v2)) {
+ const qword clipbase = (qword)((vec_uint4){tx, ty});
+ const qword clipmin = si_mpyui(clipbase, TILE_SIZE);
+ const qword clipmax = si_ai(clipmin, TILE_SIZE);
+ setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b));
+
+ if(!setup_sort_vertices(vs)) {
return FALSE; /* totally clipped */
}
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h
index aa694dd7c9..82e3b19ad7 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.h
+++ b/src/gallium/drivers/cell/spu/spu_tri.h
@@ -31,7 +31,7 @@
extern boolean
-tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty);
+tri_draw(const qword vs, uint tx, uint ty);
#endif /* SPU_TRI_H */