summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-11-05 14:02:07 -0700
committerBrian Paul <brian.paul@tungstengraphics.com>2008-11-05 14:02:07 -0700
commitf0debbb0bb951bfc6dc0ae467564b3b1230324cf (patch)
tree14bd7e06fc38f77b61a9d0812e84281634045cbf /src
parent03c0ce4c61fd970509d605fe78166e828fc1df57 (diff)
gallium: call tgsi_set_exec_mask() and use exec mask in SSE ARL code
This prevents vertex shaders from referencing invalid memory locations when the shader is operating on less than four vertices or fragments.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c14
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c35
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c3
4 files changed, 54 insertions, 4 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 44563803f9..82d27d4493 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -120,6 +120,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ max_vertices > 3);
+
/* run interpreter */
tgsi_exec_machine_run( machine );
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0efabd9de8..77ba5152f9 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;
+ /* By default, execute all channels. XXX move this inside the loop
+ * below when we support shader conditionals/loops.
+ */
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+ if (max_vertices < 4) {
+ /* disable the unused execution channels */
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ 0);
+ }
+
/* run compiled shader
*/
shader->func(machine->Inputs,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index c115956c5d..4d59106dbf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -69,6 +69,9 @@
#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
+#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C
+
/**
* X86 utility functions.
@@ -230,6 +233,9 @@ emit_const(
int indirectIndex )
{
if (indirect) {
+ /* 'vec' is the offset from the address register's value.
+ * We're loading CONST[ADDR+vec] into an xmm register.
+ */
struct x86_reg r0 = get_input_base();
struct x86_reg r1 = get_output_base();
uint i;
@@ -240,18 +246,40 @@ emit_const(
x86_push( func, r0 );
x86_push( func, r1 );
+ /*
+ * Loop over the four pixels or vertices in the quad.
+ * Get the value of the address (offset) register for pixel/vertex[i],
+ * add it to the src offset and index into the constant buffer.
+ * Note that we're working on SOA data.
+ * If any of the pixel/vertex execution channels are unused their
+ * values will be garbage. It's very important that we don't use
+ * those garbage values as indexes into the constant buffer since
+ * that'll cause segfaults.
+ * The solution is to bitwise-AND the offset with the execution mask
+ * register whose values are either 0 or ~0.
+ * The caller must setup the execution mask register to indicate
+ * which channels are valid/alive before running the shader.
+ * The execution mask will also figure into loops and conditionals
+ * someday.
+ */
for (i = 0; i < QUAD_SIZE; i++) {
- x86_lea( func, r0, get_const( vec, chan ) );
+ /* r1 = address register[i] */
x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+ /* r0 = execution mask[i] */
+ x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
+ /* r1 = r1 & r0 */
+ x86_and( func, r1, r0 );
+ /* r0 = 'vec', the offset */
+ x86_lea( func, r0, get_const( vec, chan ) );
- /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+ /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm.
*/
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
- x86_add( func, r0, r1 );
+ x86_add( func, r0, r1 ); /* r0 = r0 + r1 */
x86_mov( func, r1, x86_deref( r0 ) );
x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
}
@@ -265,6 +293,7 @@ emit_const(
get_temp( TEMP_R0, CHAN_X ) );
}
else {
+ /* 'vec' is the index into the src register file, such as TEMP[vec] */
assert( vec >= 0 );
sse_movss(
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 496ed43df2..50eb2c07bc 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base,
machine->Temps);
/* init kill mask */
- machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0;
+ tgsi_set_kill_mask(machine, 0x0);
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
shader->func( machine->Inputs,
machine->Outputs,