summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
authorMichal Krol <michal@ubuntu-vbox.(none)>2008-04-28 18:50:27 +0200
committerMichal Krol <michal@ubuntu-vbox.(none)>2008-04-28 18:50:58 +0200
commit58d3dff0d3115ddd5397b7f77b5bcf4f9ca616b6 (patch)
tree03211346223ed2171d6269d0d6fccbb39bfa2230 /src/gallium/auxiliary/draw
parent7f5e9d3f07f6fbfa699bef4ffff85fe0b557584a (diff)
gallium: Generate SSE code to swizzle and unswizzle vs inputs and outputs.
Change SSE_SWIZZLES #define to 0 to disable it.
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c52
2 files changed, 45 insertions, 10 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index f0763dad8d..4ec20493c4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -109,9 +109,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( fetch_count, 4 );
struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count);
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
if (!pipeline_verts) {
/* Not much we can do here - just skip the rendering.
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index b1e9f67114..07f85bc448 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -47,14 +47,29 @@
#include "tgsi/util/tgsi_parse.h"
#define SSE_MAX_VERTICES 4
+#define SSE_SWIZZLES 1
+#if SSE_SWIZZLES
+typedef void (XSTDCALL *codegen_function) (
+ const struct tgsi_exec_vector *input,
+ struct tgsi_exec_vector *output,
+ float (*constant)[4],
+ struct tgsi_exec_vector *temporary,
+ float (*immediates)[4],
+ const float (*aos_input)[4],
+ uint num_inputs,
+ uint input_stride,
+ float (*aos_output)[4],
+ uint num_outputs,
+ uint output_stride );
+#else
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
struct tgsi_exec_vector *temporary,
float (*immediates)[4] );
-
+#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
@@ -91,12 +106,31 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
struct tgsi_exec_machine *machine = shader->machine;
- unsigned int i, j;
- unsigned slot;
+ unsigned int i;
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+#if SSE_SWIZZLES
+ /* run compiled shader
+ */
+ shader->func(machine->Inputs,
+ machine->Outputs,
+ (float (*)[4])constants,
+ machine->Temps,
+ shader->immediates,
+ input,
+ base->info.num_inputs,
+ input_stride,
+ output,
+ base->info.num_outputs,
+ output_stride );
+
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+#else
+ unsigned int j, slot;
+
/* Swizzle inputs.
*/
for (j = 0; j < max_vertices; j++) {
@@ -105,10 +139,10 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
- }
+ }
input = (const float (*)[4])((const char *)input + input_stride);
- }
+ }
/* run compiled shader
*/
@@ -118,7 +152,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Temps,
shader->immediates);
-
/* Unswizzle all output results.
*/
for (j = 0; j < max_vertices; j++) {
@@ -127,10 +160,11 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
+ }
output = (float (*)[4])((char *)output + output_stride);
- }
+ }
+#endif
}
}
@@ -176,7 +210,7 @@ draw_create_vs_sse(struct draw_context *draw,
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates ))
+ &vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );