summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-11-05 15:58:09 -0700
committerBrian Paul <brian.paul@tungstengraphics.com>2008-11-05 15:58:09 -0700
commit80a718a63bf2fa817e346f0f5731ee9ef2e0e68b (patch)
tree4a1bdb03ae8b8482b9a15e56c9604fcc34e95b0e /src/gallium
parent7115b79b77e541f3eb81db00f6f0c34a0f224feb (diff)
parentde14fdd63f26a2e6fc55fad92c08966f269601a6 (diff)
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts: src/gallium/auxiliary/rtasm/rtasm_execmem.c src/mesa/shader/slang/slang_emit.c src/mesa/shader/slang/slang_log.c src/mesa/state_tracker/st_atom_framebuffer.c
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c14
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c35
-rw-r--r--src/gallium/auxiliary/util/u_mm.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c3
-rw-r--r--src/gallium/include/pipe/p_debug.h2
10 files changed, 114 insertions, 6 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 87ec6ae20c..b98c0a0ecf 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -33,6 +33,8 @@
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "tgsi/tgsi_dump.h"
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
@@ -195,6 +197,28 @@ draw_arrays(struct draw_context *draw, unsigned prim,
draw->reduced_prim = reduced_prim;
}
+#if 0
+ {
+ int i;
+ debug_printf("draw_arrays(prim=%u start=%u count=%u):\n",
+ prim, start, count);
+ tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+ debug_printf("Elements:\n");
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ debug_printf(" format=%s comps=%u\n",
+ pf_name(draw->pt.vertex_element[i].src_format),
+ draw->pt.vertex_element[i].nr_components);
+ }
+ debug_printf("Buffers:\n");
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ debug_printf(" pitch=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].pitch,
+ draw->pt.vertex_buffer[i].buffer_offset,
+ draw->pt.user.vbuffer[i]);
+ }
+ }
+#endif
+
/* drawing done here: */
draw_pt_arrays(draw, prim, start, count);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index a6880685db..6141ba9cbf 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -2190,7 +2190,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!vaos->buffer)
goto fail;
- debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
+ if (0)
+ debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
#if 0
tgsi_dump(vs->state.tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 13d4fcfdbf..80c3606657 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -123,6 +123,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ max_vertices > 3);
+
/* run interpreter */
tgsi_exec_machine_run( machine );
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index b11ae31662..0e2036f12a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;
+ /* By default, execute all channels. XXX move this inside the loop
+ * below when we support shader conditionals/loops.
+ */
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+ if (max_vertices < 4) {
+ /* disable the unused execution channels */
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ 0);
+ }
+
/* run compiled shader
*/
shader->func(machine->Inputs,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1a5294eabc..e32779123f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -958,6 +958,10 @@ fetch_src_file_channel(
switch( file ) {
case TGSI_FILE_CONSTANT:
assert(mach->Consts);
+ assert(index->i[0] >= 0);
+ assert(index->i[1] >= 0);
+ assert(index->i[2] >= 0);
+ assert(index->i[3] >= 0);
chan->f[0] = mach->Consts[index->i[0]][swizzle];
chan->f[1] = mach->Consts[index->i[1]][swizzle];
chan->f[2] = mach->Consts[index->i[2]][swizzle];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index c4e649e69c..fc40a25e09 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -165,6 +165,10 @@ struct tgsi_exec_labels
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_TEMP_HALF_C 1
+/* execution mask, each value is either 0 or ~0 */
+#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_MASK_C 2
+
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
@@ -265,6 +269,27 @@ void
tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+static INLINE void
+tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
+{
+ mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
+ mask;
+}
+
+
+/** Set execution mask values prior to executing the shader */
+static INLINE void
+tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
+ boolean ch0, boolean ch1, boolean ch2, boolean ch3)
+{
+ int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
+ mask[0] = ch0 ? ~0 : 0;
+ mask[1] = ch1 ? ~0 : 0;
+ mask[2] = ch2 ? ~0 : 0;
+ mask[3] = ch3 ? ~0 : 0;
+}
+
+
#if defined __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 47e52c8424..3df0c5db3f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -72,6 +72,9 @@
#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
+#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C
+
/**
* X86 utility functions.
@@ -233,6 +236,9 @@ emit_const(
int indirectIndex )
{
if (indirect) {
+ /* 'vec' is the offset from the address register's value.
+ * We're loading CONST[ADDR+vec] into an xmm register.
+ */
struct x86_reg r0 = get_input_base();
struct x86_reg r1 = get_output_base();
uint i;
@@ -243,18 +249,40 @@ emit_const(
x86_push( func, r0 );
x86_push( func, r1 );
+ /*
+ * Loop over the four pixels or vertices in the quad.
+ * Get the value of the address (offset) register for pixel/vertex[i],
+ * add it to the src offset and index into the constant buffer.
+ * Note that we're working on SOA data.
+ * If any of the pixel/vertex execution channels are unused their
+ * values will be garbage. It's very important that we don't use
+ * those garbage values as indexes into the constant buffer since
+ * that'll cause segfaults.
+ * The solution is to bitwise-AND the offset with the execution mask
+ * register whose values are either 0 or ~0.
+ * The caller must setup the execution mask register to indicate
+ * which channels are valid/alive before running the shader.
+ * The execution mask will also figure into loops and conditionals
+ * someday.
+ */
for (i = 0; i < QUAD_SIZE; i++) {
- x86_lea( func, r0, get_const( vec, chan ) );
+ /* r1 = address register[i] */
x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+ /* r0 = execution mask[i] */
+ x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
+ /* r1 = r1 & r0 */
+ x86_and( func, r1, r0 );
+ /* r0 = 'vec', the offset */
+ x86_lea( func, r0, get_const( vec, chan ) );
- /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+ /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm.
*/
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
- x86_add( func, r0, r1 );
+ x86_add( func, r0, r1 ); /* r0 = r0 + r1 */
x86_mov( func, r1, x86_deref( r0 ) );
x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
}
@@ -268,6 +296,7 @@ emit_const(
get_temp( TEMP_R0, CHAN_X ) );
}
else {
+ /* 'vec' is the index into the src register file, such as TEMP[vec] */
assert( vec >= 0 );
sse_movss(
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 592ace00fc..45ce257b5e 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -172,6 +172,10 @@ u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
int startofs = 0;
int endofs;
+ assert(size >= 0);
+ assert(align2 >= 0);
+ assert(align2 <= 12); /* sanity check, 2^12 (4KB) enough? */
+
if (!heap || align2 < 0 || size <= 0)
return NULL;
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 0111469405..8aa597f633 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base,
machine->Temps);
/* init kill mask */
- machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0;
+ tgsi_set_kill_mask(machine, 0x0);
+ tgsi_set_exec_mask(machine, 1, 1, 1, 1);
shader->func( machine->Inputs,
machine->Outputs,
diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h
index cb6196aa9f..3b00fb9aa8 100644
--- a/src/gallium/include/pipe/p_debug.h
+++ b/src/gallium/include/pipe/p_debug.h
@@ -49,7 +49,7 @@ extern "C" {
#endif
-#ifdef DBG
+#if defined(DBG) || defined(DEBUG)
#ifndef DEBUG
#define DEBUG 1
#endif