summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c152
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_debug.h60
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c34
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c9
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h50
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c122
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c34
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c208
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.h34
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c162
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.h2
12 files changed, 487 insertions, 384 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index c28677ebf8..a6ed29ea63 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -44,7 +44,6 @@
#include "spu_tile.h"
#include "spu_vertex_shader.h"
#include "spu_dcache.h"
-#include "spu_debug.h"
#include "cell/common.h"
@@ -77,9 +76,10 @@ static void
release_buffer(uint buffer)
{
/* Evidently, using less than a 16-byte status doesn't work reliably */
- static const uint status[4] ALIGN16_ATTRIB
- = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
-
+ static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE};
const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
uint *dst = spu.init.buffer_status + index;
@@ -94,10 +94,33 @@ release_buffer(uint buffer)
}
+/**
+ * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
+ * There's a qword of status per SPU.
+ */
+static void
+cmd_fence(struct cell_command_fence *fence_cmd)
+{
+ static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED};
+ uint *dst = (uint *) fence_cmd->fence;
+ dst += 4 * spu.init.id; /* main store/memory address, not local store */
+
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_FENCE, /* tag */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
- DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
if (clear->surface == 0) {
spu.fb.color_clear_value = clear->value;
@@ -165,14 +188,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
#endif /* CLEAR_OPT */
- DEBUG_PRINTF("CLEAR SURF done\n");
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
}
static void
cmd_release_verts(const struct cell_command_release_verts *release)
{
- DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
+ D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
ASSERT(release->vertex_buf != ~0U);
release_buffer(release->vertex_buf);
}
@@ -189,12 +212,13 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
static int warned = 0;
- DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+ memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
/* Parity twist! For now, always use the fallback code by default,
* only switching to codegen when specifically requested. This
@@ -228,7 +252,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
{
- DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_program_code, fp->code,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
@@ -246,10 +270,11 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos)
const float *constants = (const float *) &buffer[pos + 2];
uint i;
- DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
/* Expand each float to float[4] for SOA execution */
for (i = 0; i < num_const; i++) {
+ D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
spu.constants[i] = spu_splats(constants[i]);
}
@@ -261,7 +286,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos)
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
- DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
+ D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
cmd->width,
cmd->height,
cmd->color_start,
@@ -309,8 +334,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
*/
static void
update_tex_masks(struct spu_texture *texture,
- const struct pipe_sampler_state *sampler,
- uint unit)
+ const struct pipe_sampler_state *sampler)
{
uint i;
@@ -337,11 +361,6 @@ update_tex_masks(struct spu_texture *texture,
texture->level[i].scale_t = spu_splats(1.0f);
}
}
-
- /* XXX temporary hack */
- if (texture->target == PIPE_TEXTURE_CUBE) {
- spu.sample_texture4[unit] = sample_texture4_cube;
- }
}
@@ -350,18 +369,18 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
{
uint unit = sampler->unit;
- DEBUG_PRINTF("SAMPLER [%u]\n", unit);
+ D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
spu.sampler[unit] = sampler->state;
switch (spu.sampler[unit].min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- spu.min_sample_texture4[unit] = sample_texture4_bilinear;
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
case PIPE_TEX_FILTER_ANISO:
/* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
- spu.min_sample_texture4[unit] = sample_texture4_nearest;
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
default:
ASSERT(0);
@@ -369,12 +388,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
switch (spu.sampler[sampler->unit].mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- spu.mag_sample_texture4[unit] = sample_texture4_bilinear;
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
case PIPE_TEX_FILTER_ANISO:
/* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
- spu.mag_sample_texture4[unit] = sample_texture4_nearest;
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
default:
ASSERT(0);
@@ -383,16 +402,16 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
switch (spu.sampler[sampler->unit].min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
case PIPE_TEX_MIPFILTER_LINEAR:
- spu.sample_texture4[unit] = sample_texture4_lod;
+ spu.sample_texture_2d[unit] = sample_texture_2d_lod;
break;
case PIPE_TEX_MIPFILTER_NONE:
- spu.sample_texture4[unit] = spu.mag_sample_texture4[unit];
+ spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
break;
default:
ASSERT(0);
}
- update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit);
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
}
@@ -402,9 +421,7 @@ cmd_state_texture(const struct cell_command_texture *texture)
const uint unit = texture->unit;
uint i;
- //if (spu.init.id==0) Debug=1;
-
- DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit);
+ D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
spu.texture[unit].max_level = 0;
spu.texture[unit].target = texture->target;
@@ -414,7 +431,7 @@ cmd_state_texture(const struct cell_command_texture *texture)
uint height = texture->height[i];
uint depth = texture->depth[i];
- DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i,
+ D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
texture->start[i], texture->width[i], texture->height[i]);
spu.texture[unit].level[i].start = texture->start[i];
@@ -435,16 +452,14 @@ cmd_state_texture(const struct cell_command_texture *texture)
spu.texture[unit].max_level = i;
}
- update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit);
-
- //Debug=0;
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
}
static void
cmd_state_vertex_info(const struct vertex_info *vinfo)
{
- DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
+ D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
ASSERT(vinfo->num_attribs >= 1);
ASSERT(vinfo->num_attribs <= 8);
memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
@@ -483,7 +498,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
static void
cmd_finish(void)
{
- DEBUG_PRINTF("FINISH\n");
+ D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
really_clear_tiles(0);
/* wait for all outstanding DMAs to finish */
mfc_write_tag_mask(~0);
@@ -508,7 +523,7 @@ cmd_batch(uint opcode)
const unsigned usize = size / sizeof(buffer[0]);
uint pos;
- DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
buf, size, spu.init.buffers[buf]);
ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
@@ -528,7 +543,7 @@ cmd_batch(uint opcode)
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
- DEBUG_PRINTF("release batch buf %u\n", buf);
+ D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
release_buffer(buf);
/*
@@ -586,6 +601,14 @@ cmd_batch(uint opcode)
case CELL_CMD_STATE_FS_CONSTANTS:
pos = cmd_state_fs_constants(buffer, pos);
break;
+ case CELL_CMD_STATE_RASTERIZER:
+ {
+ struct cell_command_rasterizer *rast =
+ (struct cell_command_rasterizer *) &buffer[pos];
+ spu.rasterizer = rast->rasterizer;
+ pos += sizeof(*rast) / 8;
+ }
+ break;
case CELL_CMD_STATE_SAMPLER:
{
struct cell_command_sampler *sampler
@@ -638,6 +661,14 @@ cmd_batch(uint opcode)
cmd_finish();
pos += 1;
break;
+ case CELL_CMD_FENCE:
+ {
+ struct cell_command_fence *fence_cmd =
+ (struct cell_command_fence *) &buffer[pos];
+ cmd_fence(fence_cmd);
+ pos += sizeof(*fence_cmd) / 8;
+ }
+ break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
@@ -661,10 +692,12 @@ cmd_batch(uint opcode)
}
}
- DEBUG_PRINTF("BATCH complete\n");
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
}
+#define PERF 0
+
/**
* Main loop for SPEs: Get a command, execute it, repeat.
@@ -672,41 +705,29 @@ cmd_batch(uint opcode)
void
command_loop(void)
{
- struct cell_command cmd;
int exitFlag = 0;
+ uint t0, t1;
- DEBUG_PRINTF("Enter command loop\n");
-
- ASSERT((sizeof(struct cell_command) & 0xf) == 0);
- ASSERT_ALIGN16(&cmd);
+ D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
while (!exitFlag) {
unsigned opcode;
- int tag = 0;
- DEBUG_PRINTF("Wait for cmd...\n");
+ D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
+
+ if (PERF)
+ spu_write_decrementer(~0);
/* read/wait from mailbox */
opcode = (unsigned int) spu_read_in_mbox();
+ D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
- DEBUG_PRINTF("got cmd 0x%x\n", opcode);
-
- /* command payload */
- mfc_get(&cmd, /* dest */
- (unsigned int) spu.init.cmd, /* src */
- sizeof(struct cell_command), /* bytes */
- tag,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask( 1 << tag );
-
- /*
- * NOTE: most commands should be contained in a batch buffer
- */
+ if (PERF)
+ t0 = spu_read_decrementer();
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
- DEBUG_PRINTF("EXIT\n");
+ D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
@@ -721,9 +742,16 @@ command_loop(void)
printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
}
+ if (PERF) {
+ t1 = spu_read_decrementer();
+ printf("wait mbox time: %gms batch time: %gms\n",
+ (~0u - t0) * spu.init.inv_timebase,
+ (t0 - t1) * spu.init.inv_timebase);
+ }
}
- DEBUG_PRINTF("Exit command loop\n");
+ D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
- spu_dcache_report();
+ if (spu.init.debug_flags & CELL_DEBUG_CACHE)
+ spu_dcache_report();
}
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
index 167404cdc5..a6d67634fd 100644
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -36,7 +36,9 @@
#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
#define CACHE_LOG2NNWAY 2
#define CACHE_LOG2NSETS 6
-/*#define CACHE_STATS 1*/
+#ifdef DEBUG
+#define CACHE_STATS 1
+#endif
#include <cache-api.h>
/* Yes folks, this is ugly.
diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h
deleted file mode 100644
index eeec052655..0000000000
--- a/src/gallium/drivers/cell/spu/spu_debug.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef SPU_DEBUG_H
-#define SPU_DEBUG_H
-
-
-/* Set to 0 to disable all extraneous debugging code */
-#define DEBUG 1
-
-#if DEBUG
-extern boolean Debug;
-extern boolean force_fragment_ops_fallback;
-
-/* These debug macros use the unusual construction ", ##__VA_ARGS__"
- * which expands to the expected comma + args if variadic arguments
- * are supplied, but swallows the comma if there are no variadic
- * arguments (which avoids syntax errors that would otherwise occur).
- */
-#define DEBUG_PRINTF(format,...) \
- if (Debug) \
- printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
-#define D_PRINTF(flag, format,...) \
- if (spu.init.debug_flags & (flag)) \
- printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
-
-#else
-
-#define DEBUG_PRINTF(...)
-#define D_PRINTF(...)
-
-#endif
-
-
-#endif /* SPU_DEBUG_H */
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
index 5c3ee305d4..3534b35000 100644
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -43,6 +43,7 @@
#include "cell/common.h"
#include "spu_main.h"
#include "spu_funcs.h"
+#include "spu_texture.h"
/** For "return"-ing four vectors */
@@ -102,11 +103,34 @@ spu_log2(vector float x)
static struct vec_4x4
-spu_txp(vector float s, vector float t, vector float r, vector float q,
- unsigned unit)
+spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
{
struct vec_4x4 colors;
- spu.sample_texture4[unit](s, t, r, q, unit, 0, 0, colors.v);
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) q;
+ sample_texture_cube(s, t, r, unit, colors.v);
return colors;
}
@@ -147,7 +171,9 @@ return_function_info(void)
export_func(&funcs, "spu_pow", &spu_pow);
export_func(&funcs, "spu_exp2", &spu_exp2);
export_func(&funcs, "spu_log2", &spu_log2);
- export_func(&funcs, "spu_txp", &spu_txp);
+ export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
+ export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
+ export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
/* Send the function info back to the PPU / main memory */
mfc_put((void *) &funcs, /* src in local store */
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 4becd0f92a..c8bb251905 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -40,7 +40,6 @@
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
//#include "spu_test.h"
-#include "spu_debug.h"
#include "cell/common.h"
@@ -53,12 +52,6 @@ helpful headers:
struct spu_global spu;
-#if DEBUG
-boolean Debug = FALSE;
-boolean force_fragment_ops_fallback = TRUE;
-#endif
-
-
static void
one_time_init(void)
{
@@ -102,7 +95,7 @@ main(main_param_t speid, main_param_t argp)
one_time_init();
- DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
/* get initialization data */
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index eff43b870c..668af10be2 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -36,12 +36,18 @@
#include "pipe/p_state.h"
-
-#define MAX_WIDTH 1024
-#define MAX_HEIGHT 1024
-
-
-#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
+#if DEBUG
+/* These debug macros use the unusual construction ", ##__VA_ARGS__"
+ * which expands to the expected comma + args if variadic arguments
+ * are supplied, but swallows the comma if there are no variadic
+ * arguments (which avoids syntax errors that would otherwise occur).
+ */
+#define D_PRINTF(flag, format,...) \
+ if (spu.init.debug_flags & (flag)) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+#else
+#define D_PRINTF(...)
+#endif
/**
@@ -64,12 +70,10 @@ typedef union {
/** Function for sampling textures */
-typedef void (*spu_sample_texture4_func)(vector float s,
- vector float t,
- vector float r,
- vector float q,
- uint unit, uint level, uint face,
- vector float colors[4]);
+typedef void (*spu_sample_texture_2d_func)(vector float s,
+ vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
/** Function for performing per-fragment ops */
@@ -85,9 +89,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
uint facing);
/** Function for running fragment program */
-typedef void (*spu_fragment_program_func)(vector float *inputs,
- vector float *outputs,
- vector float *constants);
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
struct spu_framebuffer
@@ -145,7 +149,9 @@ struct spu_global
struct spu_framebuffer fb;
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_rasterizer_state rasterizer;
struct spu_texture texture[PIPE_MAX_SAMPLERS];
struct vertex_info vertex_info;
@@ -161,8 +167,8 @@ struct spu_global
ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */
- ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
/** Current fragment ops machine code, at 8-byte boundary */
uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
@@ -175,9 +181,9 @@ struct spu_global
spu_fragment_program_func fragment_program;
/** Current texture sampler function */
- spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS];
- spu_sample_texture4_func min_sample_texture4[CELL_MAX_SAMPLERS];
- spu_sample_texture4_func mag_sample_texture4[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
/** Fragment program constants */
vector float constants[4 * CELL_MAX_CONSTANTS];
@@ -186,8 +192,6 @@ struct spu_global
extern struct spu_global spu;
-extern boolean Debug;
-
@@ -206,7 +210,7 @@ extern boolean Debug;
#define TAG_DCACHE1 21
#define TAG_DCACHE2 22
#define TAG_DCACHE3 23
-
+#define TAG_FENCE 24
static INLINE void
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index d252fa6dc1..f8ffc70492 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -40,6 +40,24 @@
#define LINEAR_QUAD_LAYOUT 1
+static INLINE vector float
+spu_min(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(a, b, m);
+}
+
+
+static INLINE vector float
+spu_max(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(b, a, m);
+}
+
+
/**
* Called by rasterizer for each quad after the shader has run. Do
* all the per-fragment operations including alpha test, z test,
@@ -242,7 +260,7 @@ spu_fallback_fragment_ops(uint x, uint y,
}
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms (fragment color * factor)
*/
switch (spu.blend.rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -265,13 +283,33 @@ spu_fallback_fragment_ops(uint x, uint y,
term1g = spu_mul(fragG, fragA);
term1b = spu_mul(fragB, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term1r = spu_mul(fragR, fbRGBA[0]);
+ term1g = spu_mul(fragG, fbRGBA[1]);
+ term1b = spu_mul(fragB, fbRGBA[1]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1r = spu_mul(fragR, fbRGBA[3]);
+ term1g = spu_mul(fragG, fbRGBA[3]);
+ term1b = spu_mul(fragB, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Src Alpha term
+ * Compute Src Alpha term (fragment alpha * factor)
*/
switch (spu.blend.alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -283,19 +321,29 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLENDFACTOR_SRC_ALPHA:
term1a = spu_mul(fragA, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1a = spu_mul(fragA, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest RGB terms
+ * Compute Dest RGB terms (framebuffer color * factor)
*/
switch (spu.blend.rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2r = fragR;
- term2g = fragG;
- term2b = fragB;
+ term2r = fbRGBA[0];
+ term2g = fbRGBA[1];
+ term2b = fbRGBA[2];
break;
case PIPE_BLENDFACTOR_ZERO:
term2r =
@@ -319,17 +367,37 @@ spu_fallback_fragment_ops(uint x, uint y,
term2g = spu_mul(fbRGBA[1], tmp);
term2b = spu_mul(fbRGBA[2], tmp);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest Alpha term
+ * Compute Dest Alpha term (framebuffer alpha * factor)
*/
switch (spu.blend.alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2a = fragA;
+ term2a = fbRGBA[3];
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term2a = spu_splats(0.0f);
@@ -342,6 +410,16 @@ spu_fallback_fragment_ops(uint x, uint y,
tmp = spu_sub(one, fragA);
term2a = spu_mul(fbRGBA[3], tmp);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
@@ -361,7 +439,21 @@ spu_fallback_fragment_ops(uint x, uint y,
fragG = spu_sub(term1g, term2g);
fragB = spu_sub(term1b, term2b);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragR = spu_sub(term2r, term1r);
+ fragG = spu_sub(term2g, term1g);
+ fragB = spu_sub(term2b, term1b);
+ break;
+ case PIPE_BLEND_MIN:
+ fragR = spu_min(term1r, term2r);
+ fragG = spu_min(term1g, term2g);
+ fragB = spu_min(term1b, term2b);
+ break;
+ case PIPE_BLEND_MAX:
+ fragR = spu_max(term1r, term2r);
+ fragG = spu_max(term1g, term2g);
+ fragB = spu_max(term1b, term2b);
+ break;
default:
ASSERT(0);
}
@@ -376,7 +468,15 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLEND_SUBTRACT:
fragA = spu_sub(term1a, term2a);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragA = spu_sub(term2a, term1a);
+ break;
+ case PIPE_BLEND_MIN:
+ fragA = spu_min(term1a, term2a);
+ break;
+ case PIPE_BLEND_MAX:
+ fragA = spu_max(term1a, term2a);
+ break;
default:
ASSERT(0);
}
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 82dbeb26b7..5515bb55c9 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -175,22 +175,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
const ubyte *vertices;
const ushort *indexes;
uint i, j;
+ uint num_tiles;
-
- if (Debug) {
- printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
- "inline_vert=%u\n",
- spu.init.id,
- render->prim_type,
- render->num_verts,
- render->num_indexes,
- render->inline_verts);
-
- /*
- printf(" bound: %g, %g .. %g, %g\n",
- render->xmin, render->ymin, render->xmax, render->ymax);
- */
- }
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
+ render->prim_type,
+ render->num_verts,
+ render->num_indexes,
+ render->inline_verts);
ASSERT(sizeof(*render) % 4 == 0);
ASSERT(total_vertex_bytes % 16 == 0);
@@ -251,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+ num_tiles = 0;
+
/**
** loop over tiles, rendering tris
**/
@@ -264,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
if (!my_tile(tx, ty))
continue;
+ num_tiles++;
+
spu.cur_ctile_status = spu.ctile_status[ty][tx];
spu.cur_ztile_status = spu.ztile_status[ty][tx];
@@ -279,7 +275,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
- drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding);
+ drawn += tri_draw(v0, v1, v2, tx, ty);
}
//printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
@@ -293,7 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
spu.ztile_status[ty][tx] = spu.cur_ztile_status;
}
- if (Debug)
- printf("SPU %u: RENDER done\n",
- spu.init.id);
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER done (%u tiles hit)\n",
+ num_tiles);
}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index 42eb06a362..69784c8978 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -72,10 +72,10 @@ invalidate_tex_cache(void)
* a time.
*/
static void
-get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y,
+get_four_texels(const struct spu_texture_level *tlevel, uint face,
+ vec_int4 x, vec_int4 y,
vec_uint4 *texels)
{
- const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
unsigned texture_ea = (uintptr_t) tlevel->start;
const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
@@ -126,10 +126,9 @@ spu_clamp(vector signed int vec, vector signed int max)
* \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
void
-sample_texture4_nearest(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4])
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
vector float ss = spu_mul(s, tlevel->scale_s);
@@ -146,7 +145,7 @@ sample_texture4_nearest(vector float s, vector float t,
is = spu_clamp(is, tlevel->max_s);
it = spu_clamp(it, tlevel->max_t);
- get_four_texels(unit, level, face, is, it, texels);
+ get_four_texels(tlevel, face, is, it, texels);
/* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
spu_unpack_A8R8G8B8_transpose4(texels, colors);
@@ -158,10 +157,9 @@ sample_texture4_nearest(vector float s, vector float t,
* \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
void
-sample_texture4_bilinear(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4])
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
@@ -190,14 +188,10 @@ sample_texture4_bilinear(vector float s, vector float t,
/* get packed int texels */
vector unsigned int texels[16];
- get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
- get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
- get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
- get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
-
- /* XXX possibly rework following code to compute the weighted sample
- * colors with integer arithmetic for fewer int->float conversions.
- */
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
/* convert packed int texels to float colors */
vector float ftexels[16];
@@ -305,13 +299,13 @@ transpose(vector unsigned int *mOut0,
/**
- * Bilinear filtering, using int intead of float arithmetic
+ * Bilinear filtering, using int instead of float arithmetic for computing
+ * sample weights.
*/
void
-sample_texture4_bilinear_2(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4])
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
@@ -320,19 +314,19 @@ sample_texture4_bilinear_2(vector float s, vector float t,
vector float ss = spu_madd(s, tlevel->scale_s, half);
vector float tt = spu_madd(t, tlevel->scale_t, half);
- /* convert float coords to fixed-pt coords with 8 fraction bits */
- vector signed int is = spu_convts(ss, 8);
- vector signed int it = spu_convts(tt, 8);
+ /* convert float coords to fixed-pt coords with 7 fraction bits */
+ vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
+ vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
- /* compute integer texel weights in [0, 255] */
- vector signed int sWeights0 = spu_and(is, 255);
- vector signed int tWeights0 = spu_and(it, 255);
- vector signed int sWeights1 = spu_sub(255, sWeights0);
- vector signed int tWeights1 = spu_sub(255, tWeights0);
+ /* compute integer texel weights in [0, 127] */
+ vector signed int sWeights0 = spu_and(is, 127);
+ vector signed int tWeights0 = spu_and(it, 127);
+ vector signed int sWeights1 = spu_sub(127, sWeights0);
+ vector signed int tWeights1 = spu_sub(127, tWeights0);
- /* texel coords: is0 = is / 256, it0 = is / 256 */
- vector signed int is0 = spu_rlmask(is, -8);
- vector signed int it0 = spu_rlmask(it, -8);
+ /* texel coords: is0 = is / 128, it0 = is / 128 */
+ vector signed int is0 = spu_rlmask(is, -7);
+ vector signed int it0 = spu_rlmask(it, -7);
/* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
vector signed int is1 = spu_add(is0, 1);
@@ -352,10 +346,10 @@ sample_texture4_bilinear_2(vector float s, vector float t,
/* get packed int texels */
vector unsigned int texels[16];
- get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
- get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
- get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
- get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
/* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
{
@@ -383,36 +377,36 @@ sample_texture4_bilinear_2(vector float s, vector float t,
vector unsigned int c0, c1, c2, c3, cSum;
/* red */
- c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[0] = spu_convtf(cSum, 24);
+ colors[0] = spu_convtf(cSum, 22);
/* green */
- c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[1] = spu_convtf(cSum, 24);
+ colors[1] = spu_convtf(cSum, 22);
/* blue */
- c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[2] = spu_convtf(cSum, 24);
+ colors[2] = spu_convtf(cSum, 22);
/* alpha */
- c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[3] = spu_convtf(cSum, 24);
+ colors[3] = spu_convtf(cSum, 22);
}
@@ -420,8 +414,8 @@ sample_texture4_bilinear_2(vector float s, vector float t,
/**
* Compute level of detail factor from texcoords.
*/
-static float
-compute_lambda(uint unit, vector float s, vector float t)
+static INLINE float
+compute_lambda_2d(uint unit, vector float s, vector float t)
{
uint baseLevel = 0;
float width = spu.texture[unit].level[baseLevel].width;
@@ -430,30 +424,60 @@ compute_lambda(uint unit, vector float s, vector float t)
float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
+#if 0
+ /* ideal value */
float x = dsdx * dsdx + dtdx * dtdx;
float y = dsdy * dsdy + dtdy * dtdy;
float rho = x > y ? x : y;
rho = sqrtf(rho);
- float lambda = logf(rho) * 1.442695f;
+#else
+ /* approximation */
+ dsdx = fabsf(dsdx);
+ dsdy = fabsf(dsdy);
+ dtdx = fabsf(dtdx);
+ dtdy = fabsf(dtdy);
+ float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
+#endif
+ float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
return lambda;
}
+/**
+ * Blend two sets of colors according to weight.
+ */
+static void
+blend_colors(vector float c0[4], const vector float c1[4], float weight)
+{
+ vector float t = spu_splats(weight);
+ vector float dc0 = spu_sub(c1[0], c0[0]);
+ vector float dc1 = spu_sub(c1[1], c0[1]);
+ vector float dc2 = spu_sub(c1[2], c0[2]);
+ vector float dc3 = spu_sub(c1[3], c0[3]);
+ c0[0] = spu_madd(dc0, t, c0[0]);
+ c0[1] = spu_madd(dc1, t, c0[1]);
+ c0[2] = spu_madd(dc2, t, c0[2]);
+ c0[3] = spu_madd(dc3, t, c0[3]);
+}
+
/**
- * Texture sampling with level of detail selection.
+ * Texture sampling with level of detail selection and possibly mipmap
+ * interpolation.
*/
void
-sample_texture4_lod(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level_ignored, uint face,
- vector float colors[4])
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level_ignored, uint face,
+ vector float colors[4])
{
/*
* Note that we're computing a lambda/lod here that's used for all
* four pixels in the quad.
*/
- float lambda = compute_lambda(unit, s, t);
+ float lambda = compute_lambda_2d(unit, s, t);
+
+ (void) face;
+ (void) level_ignored;
/* apply lod bias */
lambda += spu.sampler[unit].lod_bias;
@@ -466,15 +490,34 @@ sample_texture4_lod(vector float s, vector float t,
if (lambda <= 0.0f) {
/* magnify */
- spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors);
+ spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
}
else {
/* minify */
- int level = (int) (lambda + 0.5f);
- if (level > (int) spu.texture[unit].max_level)
- level = spu.texture[unit].max_level;
- spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors);
- /* XXX to do: mipmap level interpolation */
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample two mipmap levels and interpolate */
+ int level = (int) lambda;
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample second mipmap level */
+ float weight = lambda - (float) level;
+ level++;
+ if (level <= (int) spu.texture[unit].max_level) {
+ vector float colors2[4];
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
+ blend_colors(colors, colors2, weight);
+ }
+ }
+ }
+ else {
+ /* sample one mipmap level */
+ int level = (int) (lambda + 0.5f);
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ }
}
}
@@ -552,16 +595,13 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
void
-sample_texture4_cube(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face_ignored,
- vector float colors[4])
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4])
{
- static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f};
- uint p, faces[4];
+ uint p, faces[4], level = 0;
float newS[4], newT[4];
- /* Compute cube face referenced by the four sets of texcoords.
+ /* Compute cube faces referenced by the four sets of texcoords.
* XXX we should SIMD-ize this.
*/
for (p = 0; p < 4; p++) {
@@ -577,15 +617,15 @@ sample_texture4_cube(vector float s, vector float t,
/* GOOD! All four texcoords refer to the same cube face */
s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
- sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors);
+ spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
}
else {
/* BAD! The four texcoords refer to different faces */
for (p = 0; p < 4; p++) {
vector float c[4];
- sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]),
- zero, zero, unit, level, faces[p], c);
+ spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
+ unit, level, faces[p], c);
float red = spu_extract(c[0], p);
float green = spu_extract(c[1], p);
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
index 387484c3ad..7b75b007b5 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.h
+++ b/src/gallium/drivers/cell/spu/spu_texture.h
@@ -37,37 +37,31 @@ invalidate_tex_cache(void);
extern void
-sample_texture4_nearest(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4]);
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
extern void
-sample_texture4_bilinear(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-extern void
-sample_texture4_bilinear_2(vector float s, vector float t,
- vector float r, vector float q,
+sample_texture_2d_bilinear(vector float s, vector float t,
uint unit, uint level, uint face,
vector float colors[4]);
+extern void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
extern void
-sample_texture4_lod(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level, uint face,
- vector float colors[4]);
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
extern void
-sample_texture4_cube(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, uint level_ignored, uint face_ignored,
- vector float colors[4]);
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4]);
#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 03f094373d..4caf7d6b61 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -43,11 +43,6 @@
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
typedef vector unsigned int mask_t;
-typedef union
-{
- vector float v;
- float f[4];
-} float4;
/**
@@ -91,9 +86,9 @@ struct edge {
struct interp_coef
{
- float4 a0;
- float4 dadx;
- float4 dady;
+ vector float a0;
+ vector float dadx;
+ vector float dady;
};
@@ -116,7 +111,7 @@ struct setup_stage {
struct edge etop;
struct edge emaj;
- float oneOverArea;
+ float oneOverArea; /* XXX maybe make into vector? */
uint facing;
@@ -152,14 +147,14 @@ eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
result[QUAD_TOP_LEFT] =
result[QUAD_TOP_RIGHT] =
result[QUAD_BOTTOM_LEFT] =
- result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
+ result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
break;
case INTERP_LINEAR:
{
- vector float dadx = setup.coef[slot].dadx.v;
- vector float dady = setup.coef[slot].dady.v;
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
vector float topLeft =
- spu_add(setup.coef[slot].a0.v,
+ spu_add(setup.coef[slot].a0,
spu_add(spu_mul(spu_splats(x), dadx),
spu_mul(spu_splats(y), dady)));
@@ -171,10 +166,10 @@ eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
break;
case INTERP_PERSPECTIVE:
{
- vector float dadx = setup.coef[slot].dadx.v;
- vector float dady = setup.coef[slot].dady.v;
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
vector float topLeft =
- spu_add(setup.coef[slot].a0.v,
+ spu_add(setup.coef[slot].a0,
spu_add(spu_mul(spu_splats(x), dadx),
spu_mul(spu_splats(y), dady)));
@@ -212,9 +207,9 @@ static INLINE vector float
eval_z(float x, float y)
{
const uint slot = 0;
- const float dzdx = setup.coef[slot].dadx.f[2];
- const float dzdy = setup.coef[slot].dady.f[2];
- const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
+ const float dzdx = spu_extract(setup.coef[slot].dadx, 2);
+ const float dzdy = spu_extract(setup.coef[slot].dady, 2);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy;
const vector float topLeftv = spu_splats(topLeft);
const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
return spu_add(topLeftv, derivs);
@@ -226,9 +221,9 @@ static INLINE vector float
eval_w(float x, float y)
{
const uint slot = 0;
- const float dwdx = setup.coef[slot].dadx.f[3];
- const float dwdy = setup.coef[slot].dady.f[3];
- const float topLeft = setup.coef[slot].a0.f[3] + x * dwdx + y * dwdy;
+ const float dwdx = spu_extract(setup.coef[slot].dadx, 3);
+ const float dwdy = spu_extract(setup.coef[slot].dady, 3);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy;
const vector float topLeftv = spu_splats(topLeft);
const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy };
return spu_add(topLeftv, derivs);
@@ -259,6 +254,7 @@ emit_quad( int x, int y, mask_t mask)
vector float inputs[4*4], outputs[2*4];
vector float fragZ = eval_z((float) x, (float) y);
vector float fragW = eval_w((float) x, (float) y);
+ vector unsigned int kill_mask;
/* setup inputs */
#if 0
@@ -273,7 +269,9 @@ emit_quad( int x, int y, mask_t mask)
ASSERT(spu.fragment_ops);
/* Execute the current fragment program */
- spu.fragment_program(inputs, outputs, spu.constants);
+ kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
+
+ mask = spu_andc(mask, kill_mask);
/* Execute per-fragment/quad operations, including:
* alpha test, z test, stencil test, blend and framebuffer writing.
@@ -404,30 +402,41 @@ flush_spans(void)
static void
print_vertex(const struct vertex_header *v)
{
- int i;
- fprintf(stderr, "Vertex: (%p)\n", v);
- for (i = 0; i < setup.quad.nr_attrs; i++) {
- fprintf(stderr, " %d: %f %f %f %f\n", i,
- v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
+ uint i;
+ fprintf(stderr, " Vertex: (%p)\n", v);
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ fprintf(stderr, " %d: %f %f %f %f\n", i,
+ spu_extract(v->data[i], 0),
+ spu_extract(v->data[i], 1),
+ spu_extract(v->data[i], 2),
+ spu_extract(v->data[i], 3));
}
}
#endif
+/**
+ * Sort vertices from top to bottom.
+ * Compute area and determine front vs. back facing.
+ * Do coarse clip test against tile bounds
+ * \return FALSE if tri is totally outside tile, TRUE otherwise
+ */
static boolean
setup_sort_vertices(const struct vertex_header *v0,
const struct vertex_header *v1,
const struct vertex_header *v2)
{
+ float area, sign;
+
#if DEBUG_VERTS
- fprintf(stderr, "Triangle:\n");
- print_vertex(v0);
- print_vertex(v1);
- print_vertex(v2);
+ if (spu.init.id==0) {
+ fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
+ print_vertex(v0);
+ print_vertex(v1);
+ print_vertex(v2);
+ }
#endif
- setup.vprovoke = v2;
-
/* determine bottom to top order of vertices */
{
float y0 = spu_extract(v0->data[0], 1);
@@ -439,18 +448,21 @@ setup_sort_vertices(const struct vertex_header *v0,
setup.vmin = v0;
setup.vmid = v1;
setup.vmax = v2;
+ sign = -1.0f;
}
else if (y2 <= y0) {
/* y2<=y0<=y1 */
setup.vmin = v2;
setup.vmid = v0;
setup.vmax = v1;
+ sign = -1.0f;
}
else {
/* y0<=y2<=y1 */
setup.vmin = v0;
setup.vmid = v2;
setup.vmax = v1;
+ sign = 1.0f;
}
}
else {
@@ -459,18 +471,21 @@ setup_sort_vertices(const struct vertex_header *v0,
setup.vmin = v1;
setup.vmid = v0;
setup.vmax = v2;
+ sign = 1.0f;
}
else if (y2 <= y1) {
/* y2<=y1<=y0 */
setup.vmin = v2;
setup.vmid = v1;
setup.vmax = v0;
+ sign = 1.0f;
}
else {
/* y1<=y2<=y0 */
setup.vmin = v1;
setup.vmid = v2;
setup.vmax = v0;
+ sign = -1.0f;
}
}
}
@@ -499,31 +514,16 @@ setup_sort_vertices(const struct vertex_header *v0,
/*
* Compute triangle's area. Use 1/area to compute partial
* derivatives of attributes later.
- *
- * The area will be the same as prim->det, but the sign may be
- * different depending on how the vertices get sorted above.
- *
- * To determine whether the primitive is front or back facing we
- * use the prim->det value because its sign is correct.
*/
- {
- const float area = (setup.emaj.dx * setup.ebot.dy -
- setup.ebot.dx * setup.emaj.dy);
-
- setup.oneOverArea = 1.0f / area;
- /*
- _mesa_printf("%s one-over-area %f area %f det %f\n",
- __FUNCTION__, setup.oneOverArea, area, prim->det );
- */
- }
+ area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
-#if 0
- /* We need to know if this is a front or back-facing triangle for:
- * - the GLSL gl_FrontFacing fragment attribute (bool)
- * - two-sided stencil test
- */
- setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
-#endif
+ setup.oneOverArea = 1.0f / area;
+
+ /* The product of area * sign indicates front/back orientation (0/1) */
+ setup.facing = (area * sign > 0.0f)
+ ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
+
+ setup.vprovoke = v2;
return TRUE;
}
@@ -538,9 +538,9 @@ setup_sort_vertices(const struct vertex_header *v0,
static INLINE void
const_coeff4(uint slot)
{
- setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].a0.v = setup.vprovoke->data[slot];
+ setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].a0 = setup.vprovoke->data[slot];
}
@@ -564,13 +564,13 @@ tri_linear_coeff4(uint slot)
vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea));
- setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
- vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
- setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
@@ -608,13 +608,13 @@ tri_persp_coeff4(uint slot)
vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea));
- setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
- vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
- setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
@@ -750,27 +750,13 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
}
-static float
-determinant(const float *v0, const float *v1, const float *v2)
-{
- /* edge vectors e = v0 - v2, f = v1 - v2 */
- const float ex = v0[0] - v2[0];
- const float ey = v0[1] - v2[1];
- const float fx = v1[0] - v2[0];
- const float fy = v1[1] - v2[1];
-
- /* det = cross(e,f).z */
- return ex * fy - ey * fx;
-}
-
-
/**
* Draw triangle into tile at (tx, ty) (tile coords)
* The tile data should have already been fetched.
*/
boolean
tri_draw(const float *v0, const float *v1, const float *v2,
- uint tx, uint ty, uint front_winding)
+ uint tx, uint ty)
{
setup.tx = tx;
setup.ty = ty;
@@ -781,12 +767,6 @@ tri_draw(const float *v0, const float *v1, const float *v2,
setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
- /* Before we sort vertices, determine the facing of the triangle,
- * which will be needed for front/back-face stencil application
- */
- float det = determinant(v0, v1, v2);
- setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW);
-
if (!setup_sort_vertices((struct vertex_header *) v0,
(struct vertex_header *) v1,
(struct vertex_header *) v2)) {
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h
index abc3d35160..aa694dd7c9 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.h
+++ b/src/gallium/drivers/cell/spu/spu_tri.h
@@ -31,7 +31,7 @@
extern boolean
-tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding);
+tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty);
#endif /* SPU_TRI_H */