summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c127
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c10
6 files changed, 144 insertions, 62 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 73fc70c1bc..a0e08dd10a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
const struct vertex_info *vinfo;
unsigned i;
+ unsigned nr_vbs = 0;
if (!draw->render->set_primitive( draw->render,
@@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.viewport = !draw->identity_viewport;
fse->key.clip = !draw->bypass_clipping;
- fse->key.pad = 0;
+ fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
fse->key.nr_elements * sizeof(fse->key.element[0]));
@@ -116,9 +117,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*/
fse->key.element[i].in.buffer = src->vertex_buffer_index;
fse->key.element[i].in.offset = src->src_offset;
+ nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1);
}
+ for (i = 0; i < 5 && i < nr_vbs; i++) {
+ if (draw->pt.vertex_buffer[i].pitch == 0)
+ fse->key.const_vbuffers |= (1<<i);
+ }
+ if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers);
+
{
unsigned dst_offset = 0;
@@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
}
-
- /* Would normally look up a vertex shader and peruse its list of
- * varients somehow. We omitted that step and put all the
- * hardcoded "shaders" into an array. We're just making the
- * assumption that this happens to be a matching shader... ie
- * you're running isosurf, aren't you?
- */
+
fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
&fse->key );
@@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
return ;
}
+ if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__,
+ fse->active->key.const_vbuffers);
+
/* Now set buffer pointers:
*/
- for (i = 0; i < num_vs_inputs; i++) {
- unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
-
- fse->active->set_input( fse->active,
- i,
-
- ((const ubyte *) draw->pt.user.vbuffer[buf] +
- draw->pt.vertex_buffer[buf].buffer_offset),
-
- draw->pt.vertex_buffer[buf].pitch );
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ fse->active->set_buffer( fse->active,
+ i,
+ ((const ubyte *) draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 45992d1986..68c24abad3 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -64,7 +64,7 @@ struct draw_vs_varient_key {
unsigned nr_outputs:8;
unsigned viewport:1;
unsigned clip:1;
- unsigned pad:5;
+ unsigned const_vbuffers:5;
struct draw_varient_element element[PIPE_MAX_ATTRIBS];
};
@@ -76,7 +76,7 @@ struct draw_vs_varient {
struct draw_vertex_shader *vs;
- void (*set_input)( struct draw_vs_varient *,
+ void (*set_buffer)( struct draw_vs_varient *,
unsigned i,
const void *ptr,
unsigned stride );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 4c794e0e23..87232865e2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx )
}
+void aos_spill_all( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+}
+
+
static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
struct x86_reg reg )
{
@@ -1941,6 +1953,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
aos_init_inputs( &cp, linear );
+ cp.x86_reg[0] = 0;
+ cp.x86_reg[1] = 0;
+
/* Note address for loop jump
*/
label = x86_get_label(cp.func);
@@ -2066,6 +2081,8 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
vaos->buffer[buf].base_ptr = (char *)ptr;
vaos->buffer[buf].stride = stride;
}
+
+ if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride);
}
@@ -2078,6 +2095,8 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
+ if (0) debug_printf("%s %d\n", __FUNCTION__, count);
+
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
@@ -2097,6 +2116,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
struct aos_machine *machine = vaos->draw->vs.aos_machine;
+ if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count,
+ vaos->base.key.const_vbuffers);
+
machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
machine->constants = vaos->draw->vs.aligned_constants;
machine->immediates = vaos->base.vs->immediates;
@@ -2140,7 +2162,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.key = *key;
vaos->base.vs = vs;
- vaos->base.set_input = vaos_set_buffer;
+ vaos->base.set_buffer = vaos_set_buffer;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
@@ -2154,7 +2176,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!vaos->buffer)
goto fail;
- debug_printf("nr_vb: %d\n", vaos->nr_vb);
+ debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers);
#if 0
tgsi_dump(vs->state.tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 306392e5d6..264387517b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -176,6 +176,8 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
unsigned idx,
unsigned dirty );
+void aos_spill_all( struct aos_compilation *cp );
+
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 8e08b9285f..b0c51d7fa1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -108,29 +108,45 @@ static void emit_swizzle( struct aos_compilation *cp,
static boolean get_buffer_ptr( struct aos_compilation *cp,
- unsigned buf_idx,
- struct x86_reg elt,
- struct x86_reg ptr)
+ boolean linear,
+ unsigned buf_idx,
+ struct x86_reg elt,
+ struct x86_reg ptr)
{
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
buf_idx * sizeof(struct aos_buffer));
- struct x86_reg buf_base_ptr = x86_make_disp(buf,
- Offset(struct aos_buffer, base_ptr));
-
struct x86_reg buf_stride = x86_make_disp(buf,
Offset(struct aos_buffer, stride));
+ if (linear) {
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
- /* Calculate pointer to current attrib:
- */
- x86_mov(cp->func, ptr, buf_stride);
- x86_imul(cp->func, ptr, elt);
- x86_add(cp->func, ptr, buf_base_ptr);
- return TRUE;
-}
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_ptr);
+ x86_mov(cp->func, elt, buf_stride);
+ x86_add(cp->func, elt, ptr);
+ sse_prefetchnta(cp->func, x86_deref(elt));
+ x86_mov(cp->func, buf_ptr, elt);
+ }
+ else {
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+ }
+ cp->insn_counter++;
+ return TRUE;
+}
static boolean load_input( struct aos_compilation *cp,
@@ -200,18 +216,57 @@ static boolean load_inputs( struct aos_compilation *cp,
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
{
- if (linear && cp->vaos->nr_vb == 1) {
+ unsigned i;
+ for (i = 0; i < cp->vaos->nr_vb; i++) {
+ struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
+ i * sizeof(struct aos_buffer));
- struct x86_reg elt = cp->idx_EBX;
- struct x86_reg ptr = cp->tmp_EAX;
+ struct x86_reg buf_base_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, base_ptr));
- if (!get_buffer_ptr( cp, 0, elt, ptr ))
- return FALSE;
+ if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
+ struct x86_reg ptr = cp->tmp_EAX;
- /* In the linear, single buffer case, keep the buffer pointer
- * instead of the index number.
- */
- x86_mov( cp->func, elt, ptr );
+ x86_mov(cp->func, ptr, buf_base_ptr);
+
+ /* Load all inputs for this constant vertex buffer
+ */
+ load_inputs( cp, i, x86_deref(ptr) );
+
+ /* Then just force them out to aos_machine.input[]
+ */
+ aos_spill_all( cp );
+
+ }
+ else if (linear) {
+
+ struct x86_reg elt = cp->idx_EBX;
+ struct x86_reg ptr = cp->tmp_EAX;
+
+ struct x86_reg buf_stride = x86_make_disp(buf,
+ Offset(struct aos_buffer, stride));
+
+ struct x86_reg buf_ptr = x86_make_disp(buf,
+ Offset(struct aos_buffer, ptr));
+
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, ptr, buf_stride);
+ x86_imul(cp->func, ptr, elt);
+ x86_add(cp->func, ptr, buf_base_ptr);
+
+
+ /* In the linear case, keep the buffer pointer instead of the
+ * index number.
+ */
+ if (cp->vaos->nr_vb == 1)
+ x86_mov( cp->func, elt, ptr );
+ else
+ x86_mov( cp->func, buf_ptr, ptr );
+
+ cp->insn_counter++;
+ }
}
return TRUE;
@@ -219,23 +274,22 @@ boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
{
- if (linear && cp->vaos->nr_vb == 1) {
-
- load_inputs( cp, 0, cp->idx_EBX );
+ unsigned j;
- }
- else {
- struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
- unsigned j;
-
- for (j = 0; j < cp->vaos->nr_vb; j++) {
+ for (j = 0; j < cp->vaos->nr_vb; j++) {
+ if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
+ /* just retreive pre-transformed input */
+ }
+ else if (linear && cp->vaos->nr_vb == 1) {
+ load_inputs( cp, 0, cp->idx_EBX );
+ }
+ else {
+ struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
struct x86_reg ptr = cp->tmp_EAX;
- if (!get_buffer_ptr( cp, j, elt, ptr ))
+ if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
return FALSE;
- cp->insn_counter++;
-
if (!load_inputs( cp, j, ptr ))
return FALSE;
}
@@ -252,13 +306,16 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
Offset(struct aos_buffer, stride)));
x86_add(cp->func, cp->idx_EBX, stride);
+ sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX));
}
else if (linear) {
- x86_inc(cp->func, cp->idx_EBX);
+ /* Nothing to do */
}
else {
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
}
+
+ return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 4daf05dae7..7ee567d478 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -64,10 +64,10 @@ struct draw_vs_varient_generic {
-static void vsvg_set_input( struct draw_vs_varient *varient,
- unsigned buffer,
- const void *ptr,
- unsigned stride )
+static void vsvg_set_buffer( struct draw_vs_varient *varient,
+ unsigned buffer,
+ const void *ptr,
+ unsigned stride )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.key = *key;
vsvg->base.vs = vs;
- vsvg->base.set_input = vsvg_set_input;
+ vsvg->base.set_buffer = vsvg_set_buffer;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;