summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw/draw_vs_sse.c
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-04-17 23:44:32 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-04-18 10:48:54 +0100
commita773f06e969a3992451dd7fe6fd55ea96b2774fa (patch)
tree7b8f2ef0bf53da7312c8a89774a5159a87c90e76 /src/gallium/auxiliary/draw/draw_vs_sse.c
parent01b6354e72a84f8c3c22be1f77eab8d9c05920a3 (diff)
draw: split off all the extra functionality in the vertex shader
This will at least allow us to make the initial gains to get decent vertex performance much more quickly & with higher confidence of getting it right. At some later point can look again at code-generating all the fetch/cliptest/viewport extras in the same block as the vertex shader. For now, just need to get some decent baseline performance.
Diffstat (limited to 'src/gallium/auxiliary/draw/draw_vs_sse.c')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c38
1 files changed, 18 insertions, 20 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 873ecfdc5d..a763f3845c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -91,12 +91,10 @@ vs_sse_run( struct draw_vertex_shader *base,
unsigned vertex_size )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct tgsi_exec_machine *machine = shader->machine;
unsigned int i, j;
unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ struct tgsi_exec_vector *outputs = 0;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@@ -104,13 +102,13 @@ vs_sse_run( struct draw_vertex_shader *base,
/* Consts does not require 16 byte alignment. */
machine->Consts = (const float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
+
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
+ outputs = machine->Inputs;
}
else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ outputs = machine->Outputs;
}
for (i = 0; i < count; i += SSE_MAX_VERTICES) {
@@ -142,10 +140,10 @@ vs_sse_run( struct draw_vertex_shader *base,
struct vertex_header *out =
draw_header_from_block(vOut, vertex_size, i + j);
- x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+ x = out->clip[0] = outputs[0].xyzw[0].f[j];
+ y = out->clip[1] = outputs[0].xyzw[1].f[j];
+ z = out->clip[2] = outputs[0].xyzw[2].f[j];
+ w = out->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
out->clipmask = compute_clipmask(out->clip, draw->plane,
@@ -182,10 +180,10 @@ vs_sse_run( struct draw_vertex_shader *base,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ out->data[slot][0] = outputs[slot].xyzw[0].f[j];
+ out->data[slot][1] = outputs[slot].xyzw[1].f[j];
+ out->data[slot][2] = outputs[slot].xyzw[2].f[j];
+ out->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
#if 0 /*DEBUG*/
printf("%d) Post xform vert:\n", i + j);
@@ -233,6 +231,8 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
+
+ input = (const float (*)[4])((const char *)input + input_stride);
}
/* run compiled shader
@@ -253,12 +253,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
- }
- /* Advance input, output pointers:
- */
- input = (const float (*)[4])((const char *)input + input_stride);
- output = (float (*)[4])((char *)output + output_stride);
+ output = (float (*)[4])((char *)output + output_stride);
+ }
}
}
@@ -300,6 +297,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.run = vs_sse_run;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+ vs->machine = &draw->machine;
x86_init_func( &vs->sse2_program );