summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c38
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c43
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h44
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c68
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c211
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c10
17 files changed, 341 insertions, 135 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 78249054f2..b439bc4059 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -274,6 +274,14 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable)
}
+void
+draw_set_force_passthrough( struct draw_context *draw, boolean enable )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->force_passthrough = enable;
+}
+
+
/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 0ab3681b64..3eeb453531 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,6 +160,9 @@ void draw_set_render( struct draw_context *draw,
void draw_set_driver_clipping( struct draw_context *draw,
boolean bypass_clipping );
+void draw_set_force_passthrough( struct draw_context *draw,
+ boolean enable );
+
/*******************************************************************************
* Draw pipeline
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index c0cf4269db..9825e116c3 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) );
+ unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
- switch (vbuf->vinfo->emit[i]) {
+ switch (vbuf->vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 626a2e3e30..5d531146c5 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -163,12 +163,15 @@ struct draw_context
struct {
boolean bypass_clipping;
+ boolean bypass_vs;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+ boolean force_passthrough; /**< never clip or shade */
+
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
@@ -193,7 +196,7 @@ struct draw_context
const float (*aligned_constants)[4];
- float (*aligned_constant_storage)[4];
+ const float (*aligned_constant_storage)[4];
unsigned const_storage_size;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 669c11c993..87ec6ae20c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -69,26 +69,26 @@ draw_pt_arrays(struct draw_context *draw,
return TRUE;
}
-
- if (!draw->render) {
- opt |= PT_PIPELINE;
- }
-
- if (draw_need_pipeline(draw,
- draw->rasterizer,
- prim)) {
- opt |= PT_PIPELINE;
- }
-
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
- opt |= PT_CLIPTEST;
+ if (!draw->force_passthrough) {
+ if (!draw->render) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (draw_need_pipeline(draw,
+ draw->rasterizer,
+ prim)) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ opt |= PT_CLIPTEST;
+ }
+
+ if (!draw->rasterizer->bypass_vs) {
+ opt |= PT_SHADE;
+ }
}
-
- if (!draw->rasterizer->bypass_vs) {
- opt |= PT_SHADE;
- }
-
-
+
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
else if (opt == PT_SHADE && !draw->pt.no_fse)
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d4eca80588..d520b05869 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
+ unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5a4db6cfe5..3966ad48ba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
memset(&key, 0, sizeof(key));
for (i = 0; i < vinfo->num_attribs; i++) {
- const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]];
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index];
unsigned emit_sz = 0;
unsigned input_format = src->src_format;
@@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
unsigned input_offset = src->src_offset;
unsigned output_format;
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 73fc70c1bc..f7e6a1a8ee 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
const struct vertex_info *vinfo;
unsigned i;
+ unsigned nr_vbs = 0;
if (!draw->render->set_primitive( draw->render,
@@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.viewport = !draw->identity_viewport;
fse->key.clip = !draw->bypass_clipping;
- fse->key.pad = 0;
+ fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
fse->key.nr_elements * sizeof(fse->key.element[0]));
@@ -116,16 +117,23 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*/
fse->key.element[i].in.buffer = src->vertex_buffer_index;
fse->key.element[i].in.offset = src->src_offset;
+ nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1);
}
+ for (i = 0; i < 5 && i < nr_vbs; i++) {
+ if (draw->pt.vertex_buffer[i].pitch == 0)
+ fse->key.const_vbuffers |= (1<<i);
+ }
+ if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers);
+
{
unsigned dst_offset = 0;
for (i = 0; i < vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
emit_sz = 4 * sizeof(float);
break;
@@ -153,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
* numbers, not to positions in the hw vertex description --
* that's handled by the output_offset field.
*/
- fse->key.element[i].out.format = vinfo->emit[i];
- fse->key.element[i].out.vs_output = vinfo->src_index[i];
+ fse->key.element[i].out.format = vinfo->attrib[i].emit;
+ fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index;
fse->key.element[i].out.offset = dst_offset;
dst_offset += emit_sz;
@@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
}
-
- /* Would normally look up a vertex shader and peruse its list of
- * varients somehow. We omitted that step and put all the
- * hardcoded "shaders" into an array. We're just making the
- * assumption that this happens to be a matching shader... ie
- * you're running isosurf, aren't you?
- */
+
fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
&fse->key );
@@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
return ;
}
+ if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__,
+ fse->active->key.const_vbuffers);
+
/* Now set buffer pointers:
*/
- for (i = 0; i < num_vs_inputs; i++) {
- unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
-
- fse->active->set_input( fse->active,
- i,
-
- ((const ubyte *) draw->pt.user.vbuffer[buf] +
- draw->pt.vertex_buffer[buf].buffer_offset),
-
- draw->pt.vertex_buffer[buf].pitch );
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ fse->active->set_buffer( fse->active,
+ i,
+ ((const ubyte *) draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 1446f785c5..3214213e44 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
vinfo->size = 0;
for (i = 0; i < vinfo->num_attribs; i++) {
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_OMIT:
break;
case EMIT_4UB:
@@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
unsigned i, j;
for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
+ j = vinfo->attrib[i].src_index;
+ switch (vinfo->attrib[i].emit) {
case EMIT_OMIT:
debug_printf("EMIT_OMIT:");
break;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 16c65c4317..a943607d7e 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -75,12 +75,41 @@ struct vertex_info
{
uint num_attribs;
uint hwfmt[4]; /**< hardware format info for this format */
- enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS];
- enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */
- uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */
uint size; /**< total vertex size in dwords */
+
+ /* Keep this small and at the end of the struct to allow quick
+ * memcmp() comparisons.
+ */
+ struct {
+ ubyte interp_mode:4; /**< INTERP_x */
+ ubyte emit:4; /**< EMIT_x */
+ ubyte src_index; /**< map to post-xform attribs */
+ } attrib[PIPE_MAX_SHADER_INPUTS];
};
+static INLINE int
+draw_vinfo_size( const struct vertex_info *a )
+{
+ return ((const char *)&a->attrib[a->num_attribs] -
+ (const char *)a);
+}
+
+static INLINE int
+draw_vinfo_compare( const struct vertex_info *a,
+ const struct vertex_info *b )
+{
+ unsigned sizea = draw_vinfo_size( a );
+ return memcmp( a, b, sizea );
+}
+
+static INLINE void
+draw_vinfo_copy( struct vertex_info *dst,
+ const struct vertex_info *src )
+{
+ unsigned size = draw_vinfo_size( src );
+ memcpy( dst, src, size );
+}
+
/**
@@ -91,14 +120,15 @@ struct vertex_info
*/
static INLINE uint
draw_emit_vertex_attr(struct vertex_info *vinfo,
- enum attrib_emit emit, enum interp_mode interp,
+ enum attrib_emit emit,
+ enum interp_mode interp, /* only used by softpipe??? */
uint src_index)
{
const uint n = vinfo->num_attribs;
assert(n < PIPE_MAX_SHADER_INPUTS);
- vinfo->emit[n] = emit;
- vinfo->interp_mode[n] = interp;
- vinfo->src_index[n] = src_index;
+ vinfo->attrib[n].emit = emit;
+ vinfo->attrib[n].interp_mode = interp;
+ vinfo->attrib[n].src_index = src_index;
vinfo->num_attribs++;
return n;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 45992d1986..68c24abad3 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -64,7 +64,7 @@ struct draw_vs_varient_key {
unsigned nr_outputs:8;
unsigned viewport:1;
unsigned clip:1;
- unsigned pad:5;
+ unsigned const_vbuffers:5;
struct draw_varient_element element[PIPE_MAX_ATTRIBS];
};
@@ -76,7 +76,7 @@ struct draw_vs_varient {
struct draw_vertex_shader *vs;
- void (*set_input)( struct draw_vs_varient *,
+ void (*set_buffer)( struct draw_vs_varient *,
unsigned i,
const void *ptr,
unsigned stride );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index a556477a76..87232865e2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp,
assert(which_reg == 1);
offset = Offset(struct aos_machine, constants);
break;
- case X86_ATTRIBS:
+ case X86_BUFFERS:
assert(which_reg == 0);
- offset = Offset(struct aos_machine, attrib);
+ offset = Offset(struct aos_machine, buffer);
break;
default:
assert(0);
@@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx )
}
+void aos_spill_all( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+}
+
+
static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
struct x86_reg reg )
{
@@ -1939,6 +1951,11 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
save_fpu_state( &cp );
set_fpu_round_nearest( &cp );
+ aos_init_inputs( &cp, linear );
+
+ cp.x86_reg[0] = 0;
+ cp.x86_reg[1] = 0;
+
/* Note address for loop jump
*/
label = x86_get_label(cp.func);
@@ -2018,13 +2035,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
/* Incr index
*/
- if (linear) {
- x86_inc(cp.func, cp.idx_EBX);
- }
- else {
- x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4));
- }
-
+ aos_incr_inputs( &cp, linear );
}
/* decr count, loop if not zero
*/
@@ -2065,15 +2076,13 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
unsigned stride )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
- unsigned i;
- for (i = 0; i < vaos->base.key.nr_inputs; i++) {
- if (vaos->base.key.element[i].in.buffer == buf) {
- vaos->attrib[i].input_ptr = ((char *)ptr +
- vaos->base.key.element[i].in.offset);
- vaos->attrib[i].input_stride = stride;
- }
+ if (buf < vaos->nr_vb) {
+ vaos->buffer[buf].base_ptr = (char *)ptr;
+ vaos->buffer[buf].stride = stride;
}
+
+ if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride);
}
@@ -2086,10 +2095,12 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
+ if (0) debug_printf("%s %d\n", __FUNCTION__, count);
+
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
- machine->attrib = vaos->attrib;
+ machine->buffer = vaos->buffer;
vaos->gen_run_elts( machine,
elts,
@@ -2105,10 +2116,13 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
+ if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count,
+ vaos->base.key.const_vbuffers);
+
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
- machine->attrib = vaos->attrib;
+ machine->buffer = vaos->buffer;
vaos->gen_run_linear( machine,
start,
@@ -2127,7 +2141,7 @@ static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
- FREE( vaos->attrib );
+ FREE( vaos->buffer );
x86_release_func( &vaos->func[0] );
x86_release_func( &vaos->func[1] );
@@ -2140,6 +2154,7 @@ static void vaos_destroy( struct draw_vs_varient *varient )
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key )
{
+ unsigned i;
struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
if (!vaos)
@@ -2147,17 +2162,22 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.key = *key;
vaos->base.vs = vs;
- vaos->base.set_input = vaos_set_buffer;
+ vaos->base.set_buffer = vaos_set_buffer;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
vaos->draw = vs->draw;
- vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
- if (!vaos->attrib)
+ for (i = 0; i < key->nr_inputs; i++)
+ vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 );
+
+ vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) );
+ if (!vaos->buffer)
goto fail;
+ debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
+
#if 0
tgsi_dump(vs->state.tokens, 0);
#endif
@@ -2179,8 +2199,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
return &vaos->base;
fail:
- if (vaos && vaos->attrib)
- FREE(vaos->attrib);
+ if (vaos && vaos->buffer)
+ FREE(vaos->buffer);
if (vaos)
x86_release_func( &vaos->func[0] );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 7fe6f79db0..264387517b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -87,9 +87,10 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
-struct aos_attrib {
- const void *input_ptr;
- unsigned input_stride;
+struct aos_buffer {
+ const void *base_ptr;
+ unsigned stride;
+ void *ptr; /* updated per vertex */
};
@@ -123,7 +124,7 @@ struct aos_machine {
const float (*immediates)[4]; /* points to shader data */
const float (*constants)[4]; /* points to draw data */
- const struct aos_attrib *attrib; /* points to ? */
+ const struct aos_buffer *buffer; /* points to ? */
};
@@ -175,12 +176,15 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
unsigned idx,
unsigned dirty );
+void aos_spill_all( struct aos_compilation *cp );
+
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );
-boolean aos_fetch_inputs( struct aos_compilation *cp,
- boolean linear );
+boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
+boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_emit_outputs( struct aos_compilation *cp );
@@ -210,7 +214,7 @@ do { \
#define X86_NULL 0
#define X86_IMMEDIATES 1
#define X86_CONSTANTS 2
-#define X86_ATTRIBS 3
+#define X86_BUFFERS 3
struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned which_reg,
@@ -232,7 +236,8 @@ struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
- struct aos_attrib *attrib;
+ struct aos_buffer *buffer;
+ unsigned nr_vb;
vaos_run_linear_func gen_run_linear;
vaos_run_elts_func gen_run_elts;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 26297c74f8..dd79bc799a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -54,6 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
+#if 1
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
/* data = z ? ? ? */
sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
@@ -62,6 +63,16 @@ static void emit_load_R32G32B32( struct aos_compilation *cp,
/* data = ? 0 z 1 */
sse_movlps(cp->func, data, src_ptr);
/* data = x y z 1 */
+#else
+ sse_movups(cp->func, data, src_ptr);
+ /* data = x y z ? */
+ sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
+ /* data = ? x y z */
+ sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
+ /* data = 1 x y z */
+ sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
+ /* data = x y z 1 */
+#endif
}
static void emit_load_R32G32( struct aos_compilation *cp,
@@ -95,28 +106,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
-static void get_src_ptr( struct aos_compilation *cp,
- struct x86_reg src,
- struct x86_reg elt,
- unsigned a )
-{
- struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ),
- a * sizeof(struct aos_attrib));
-
- struct x86_reg input_ptr = x86_make_disp(attrib,
- Offset(struct aos_attrib, input_ptr));
-
- struct x86_reg input_stride = x86_make_disp(attrib,
- Offset(struct aos_attrib, input_stride));
-
- /* Calculate pointer to current attrib:
- */
- x86_mov(cp->func, src, input_stride);
- x86_imul(cp->func, src, elt);
- x86_add(cp->func, src, input_ptr);
-}
-
-
/* Extended swizzles? Maybe later.
*/
static void emit_swizzle( struct aos_compilation *cp,
@@ -128,22 +117,60 @@ static void emit_swizzle( struct aos_compilation *cp,
}
+
+static boolean get_buffer_ptr( struct aos_compilation *cp,
+ boolean linear,
+ unsigned buf_idx,
+ struct x86_reg elt,
+ struct x86_reg ptr)
+{
+ struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ buf_idx * sizeof(struct aos_buffer));
+
+ struct x86_reg buf_stride = x86_make_disp(buf,
+ Offset(struct aos_buffer, stride));
+ if (linear) {
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_ptr);
+ x86_mov(cp->func, elt, buf_stride);
+ x86_add(cp->func, elt, ptr);
+ if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
+ x86_mov(cp->func, buf_ptr, elt);
+ }
+ else {
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+ }
+
+ cp->insn_counter++;
+
+ return TRUE;
+}
+
+
static boolean load_input( struct aos_compilation *cp,
unsigned idx,
- boolean linear )
+ struct x86_reg bufptr )
{
unsigned format = cp->vaos->base.key.element[idx].in.format;
- struct x86_reg src = cp->tmp_EAX;
+ unsigned offset = cp->vaos->base.key.element[idx].in.offset;
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
/* Figure out source pointer address:
*/
- get_src_ptr(cp,
- src,
- linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
- idx);
-
- src = x86_deref(src);
+ struct x86_reg src = x86_make_disp(bufptr, offset);
aos_adopt_xmm_reg( cp,
dataXMM,
@@ -179,20 +206,128 @@ static boolean load_input( struct aos_compilation *cp,
return TRUE;
}
-
-boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
+static boolean load_inputs( struct aos_compilation *cp,
+ unsigned buffer,
+ struct x86_reg ptr )
{
unsigned i;
-
+
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
- if (!load_input( cp, i, linear ))
- return FALSE;
- cp->insn_counter++;
+ if (cp->vaos->base.key.element[i].in.buffer == buffer) {
+
+ if (!load_input( cp, i, ptr ))
+ return FALSE;
+
+ cp->insn_counter++;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned i;
+ for (i = 0; i < cp->vaos->nr_vb; i++) {
+ struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ i * sizeof(struct aos_buffer));
+
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
+
+ if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ x86_mov(cp->func, ptr, buf_base_ptr);
+
+ /* Load all inputs for this constant vertex buffer
+ */
+ load_inputs( cp, i, x86_deref(ptr) );
+
+ /* Then just force them out to aos_machine.input[]
+ */
+ aos_spill_all( cp );
+
+ }
+ else if (linear) {
+
+ struct x86_reg elt = cp->idx_EBX;
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ struct x86_reg buf_stride = x86_make_disp(buf,
+ Offset(struct aos_buffer, stride));
+
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+
+
+ /* In the linear case, keep the buffer pointer instead of the
+ * index number.
+ */
+ if (cp->vaos->nr_vb == 1)
+ x86_mov( cp->func, elt, ptr );
+ else
+ x86_mov( cp->func, buf_ptr, ptr );
+
+ cp->insn_counter++;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned j;
+
+ for (j = 0; j < cp->vaos->nr_vb; j++) {
+ if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
+ /* just retreive pre-transformed input */
+ }
+ else if (linear && cp->vaos->nr_vb == 1) {
+ load_inputs( cp, 0, cp->idx_EBX );
+ }
+ else {
+ struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
+ return FALSE;
+
+ if (!load_inputs( cp, j, ptr ))
+ return FALSE;
+ }
}
return TRUE;
}
+boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
+{
+ if (linear && cp->vaos->nr_vb == 1) {
+ struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ (0 * sizeof(struct aos_buffer) +
+ Offset(struct aos_buffer, stride)));
+
+ x86_add(cp->func, cp->idx_EBX, stride);
+ sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
+ }
+ else if (linear) {
+ /* Nothing to do */
+ }
+ else {
+ x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
+ }
+
+ return TRUE;
+}
@@ -203,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
- sse_movups(cp->func, dst_ptr, dataXMM);
+ sse_movaps(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32G32B32( struct aos_compilation *cp,
@@ -306,7 +441,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp )
if (data.file != file_XMM) {
struct x86_reg tmp = aos_get_xmm_reg( cp );
- sse_movups(cp->func, tmp, data);
+ sse_movaps(cp->func, tmp, data);
data = tmp;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 2ce30b9a02..727977bc3a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -32,6 +32,7 @@
* Brian Paul
*/
+#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0efabd9de8..b11ae31662 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -37,7 +37,7 @@
#include "draw_vs.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
#include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 4daf05dae7..7ee567d478 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -64,10 +64,10 @@ struct draw_vs_varient_generic {
-static void vsvg_set_input( struct draw_vs_varient *varient,
- unsigned buffer,
- const void *ptr,
- unsigned stride )
+static void vsvg_set_buffer( struct draw_vs_varient *varient,
+ unsigned buffer,
+ const void *ptr,
+ unsigned stride )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.key = *key;
vsvg->base.vs = vs;
- vsvg->base.set_input = vsvg_set_input;
+ vsvg->base.set_buffer = vsvg_set_buffer;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;