summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-06-23 00:01:17 +1000
committerBen Skeggs <skeggsb@gmail.com>2008-06-23 00:01:17 +1000
commitaa3ab377e6e2e5811cdd704d87c3e24acb5eff72 (patch)
treeab8a443a58a58a6b88f35d4b5730ed1292e44d26 /src/gallium/auxiliary
parent8c26a521ee80f5d8a1d0aabd0910233aad400322 (diff)
parente2c3f06e9649b5b87fc9adbca7d1f07841bba895 (diff)
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Conflicts: configs/default
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/draw/Makefile6
-rw-r--r--src/gallium/auxiliary/draw/SConscript8
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c47
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c66
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c62
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c43
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c32
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c27
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c27
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h58
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c59
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h42
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h153
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c101
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c36
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c125
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c406
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c193
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c38
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c103
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c196
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h194
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h91
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c246
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h11
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c181
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h133
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c2138
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h248
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c325
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_machine.c323
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c77
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c321
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp10
-rw-r--r--src/gallium/auxiliary/pipebuffer/Makefile1
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c35
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h14
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c101
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c7
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c4
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c4
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c190
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h36
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c18
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c48
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_build.c134
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_build.h19
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c95
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.h18
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.c17
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.h9
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.c87
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.h4
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_util.c2
-rw-r--r--src/gallium/auxiliary/translate/translate.c3
-rw-r--r--src/gallium/auxiliary/translate/translate.h18
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c252
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c39
-rw-r--r--src/gallium/auxiliary/util/Makefile3
-rw-r--r--src/gallium/auxiliary/util/p_debug.c50
-rw-r--r--src/gallium/auxiliary/util/p_debug_mem.c53
-rw-r--r--src/gallium/auxiliary/util/u_blit.c2
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h12
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c33
-rw-r--r--src/gallium/auxiliary/util/u_time.c74
-rw-r--r--src/gallium/auxiliary/util/u_time.h2
81 files changed, 6677 insertions, 961 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index da7eded21f..f2e36a89e9 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -26,12 +26,18 @@ C_SOURCES = \
draw_pt_emit.c \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
+ draw_pt_fetch_shade_emit.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
+ draw_pt_util.c \
draw_pt_varray.c \
draw_pt_vcache.c \
draw_vertex.c \
draw_vs.c \
+ draw_vs_varient.c \
+ draw_vs_aos.c \
+ draw_vs_aos_io.c \
+ draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 3b5d5ed492..544a04918b 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -15,7 +15,7 @@ draw = env.ConvenienceLibrary(
'draw_pipe_stipple.c',
'draw_pipe_twoside.c',
'draw_pipe_unfilled.c',
- 'draw_pipe_util.c',
+ 'draw_pipe_util.c',
'draw_pipe_validate.c',
'draw_pipe_vbuf.c',
'draw_pipe_wide_line.c',
@@ -25,15 +25,21 @@ draw = env.ConvenienceLibrary(
'draw_pt_emit.c',
'draw_pt_fetch.c',
'draw_pt_fetch_emit.c',
+ 'draw_pt_fetch_shade_emit.c',
'draw_pt_fetch_shade_pipeline.c',
'draw_pt_post_vs.c',
+ 'draw_pt_util.c',
'draw_pt_varray.c',
'draw_pt_vcache.c',
'draw_vertex.c',
'draw_vs.c',
+ 'draw_vs_aos.c',
+ 'draw_vs_aos_io.c',
+ 'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
'draw_vs_sse.c',
+ 'draw_vs_varient.c'
])
auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 98e23fa830..2f263cf06a 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -56,12 +56,6 @@ struct draw_context *draw_create( void )
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
- tgsi_exec_machine_init(&draw->machine);
-
- /* FIXME: give this machine thing a proper constructor:
- */
- draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw_pipeline_init( draw ))
goto fail;
@@ -69,6 +63,9 @@ struct draw_context *draw_create( void )
if (!draw_pt_init( draw ))
goto fail;
+ if (!draw_vs_init( draw ))
+ goto fail;
+
return draw;
fail:
@@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw )
return;
- if (draw->machine.Inputs)
- align_free(draw->machine.Inputs);
-
- if (draw->machine.Outputs)
- align_free(draw->machine.Outputs);
-
- tgsi_exec_machine_free_data(&draw->machine);
/* Not so fast -- we're just borrowing this at the moment.
*
@@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw )
draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
+ draw_vs_destroy( draw );
FREE( draw );
}
@@ -183,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw,
viewport->translate[1] == 0.0f &&
viewport->translate[2] == 0.0f &&
viewport->translate[3] == 0.0f);
+
+ draw_vs_set_viewport( draw, viewport );
}
@@ -224,9 +217,11 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
void
draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer)
+ const void *buffer,
+ unsigned size )
{
draw->pt.user.constants = buffer;
+ draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
}
@@ -295,7 +290,7 @@ int
draw_find_vs_output(struct draw_context *draw,
uint semantic_name, uint semantic_index)
{
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == semantic_name &&
@@ -320,7 +315,7 @@ draw_find_vs_output(struct draw_context *draw,
uint
draw_num_vs_outputs(struct draw_context *draw)
{
- uint count = draw->vertex_shader->info.num_outputs;
+ uint count = draw->vs.vertex_shader->info.num_outputs;
if (draw->extra_vp_outputs.slot > 0)
count++;
return count;
@@ -354,14 +349,30 @@ void draw_set_edgeflags( struct draw_context *draw,
* \param elements the element buffer ptr
*/
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements )
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = min_index;
+ draw->pt.user.max_index = max_index;
}
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize,
+ void *elements )
+{
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = 0;
+ draw->pt.user.max_index = 0xffffffff;
+}
+
/* Revamp me please:
*/
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index c5c3d3b09e..b8f2bfa332 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -118,14 +118,23 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
+void
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ void *elements );
+
void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements );
+ unsigned eltSize,
+ void *elements );
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
void draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer);
+ const void *buffer,
+ unsigned size );
void draw_set_edgeflags( struct draw_context *draw,
const unsigned *edgeflag );
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 46afb0f41f..3355c871ee 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -212,6 +212,72 @@ void draw_pipeline_run( struct draw_context *draw,
draw->pipeline.vertex_count = 0;
}
+#define QUAD(i0,i1,i2,i3) \
+ do_triangle( draw, \
+ ( DRAW_PIPE_RESET_STIPPLE | \
+ DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_2 ), \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i3)); \
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i2), \
+ verts + stride * (i3))
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, \
+ flags, /* flags */ \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i2))
+
+#define LINE(flags,i0,i1) \
+ do_line( draw, \
+ flags, \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i+1))
+
+#define POINT(i0) \
+ do_point( draw, \
+ verts + stride * i0 )
+
+#define FUNC pipe_run_linear
+#define ARGS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride
+
+#define LOCAL_VARS \
+ char *verts = (char *)vertices; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned i; \
+ ushort flags
+
+#define FLUSH
+
+#include "draw_pt_decompose.h"
+
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ char *verts = (char *)vertices;
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = count;
+
+ pipe_run_linear(draw, prim, vertices, stride, count);
+
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
+}
void draw_pipeline_flush( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
index f1cb0891ca..dbad8f98ac 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage,
{
struct vertex_header *tmp = stage->tmp[idx];
const uint vsize = sizeof(struct vertex_header)
- + stage->draw->num_vs_outputs * 4 * sizeof(float);
+ + stage->draw->vs.num_vs_outputs * 4 * sizeof(float);
memcpy(tmp, vert, vsize);
tmp->vertex_id = UNDEFINED_VERTEX_ID;
return tmp;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index b1ed8aa24e..ecdebca5f1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -78,6 +78,8 @@ struct aaline_stage
/** For AA lines, this is the vertex attrib slot for the new texcoords */
uint tex_slot;
+ /** position, not necessarily output zero */
+ uint pos_slot;
void *sampler_cso;
struct pipe_texture *texture;
@@ -141,18 +143,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
decl->Semantic.SemanticIndex == 0) {
- aactx->colorOutput = decl->u.DeclarationRange.First;
+ aactx->colorOutput = decl->DeclarationRange.First;
}
else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
uint i;
- for (i = decl->u.DeclarationRange.First;
- i <= decl->u.DeclarationRange.Last; i++) {
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
aactx->samplersUsed |= 1 << i;
}
}
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- if ((int) decl->u.DeclarationRange.Last > aactx->maxInput)
- aactx->maxInput = decl->u.DeclarationRange.Last;
+ if ((int) decl->DeclarationRange.Last > aactx->maxInput)
+ aactx->maxInput = decl->DeclarationRange.Last;
if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
(int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
aactx->maxGeneric = decl->Semantic.SemanticIndex;
@@ -160,8 +162,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
}
else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
uint i;
- for (i = decl->u.DeclarationRange.First;
- i <= decl->u.DeclarationRange.Last; i++) {
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
aactx->tempsUsed |= (1 << i);
}
}
@@ -225,34 +227,33 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
/* declare new generic input/texcoord */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_INPUT;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
- decl.Declaration.Interpolate = 1;
- /* XXX this could be linear... */
- decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = aactx->maxInput + 1;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->maxInput + 1;
ctx->emit_declaration(ctx, &decl);
/* declare new sampler */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = aactx->freeSampler;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->freeSampler;
ctx->emit_declaration(ctx, &decl);
/* declare new temp regs */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = aactx->texTemp;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->texTemp;
ctx->emit_declaration(ctx, &decl);
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = aactx->colorTemp;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->colorTemp;
ctx->emit_declaration(ctx, &decl);
aactx->firstInstruction = FALSE;
@@ -521,9 +522,10 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
struct prim_header tri;
struct vertex_header *v[8];
uint texPos = aaline->tex_slot;
+ uint posPos = aaline->pos_slot;
float *pos, *tex;
- float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0];
- float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1];
+ float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0];
+ float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1];
double a = atan2(dy, dx);
float c_a = (float) cos(a), s_a = (float) sin(a);
uint i;
@@ -550,35 +552,35 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
*/
/* new verts */
- pos = v[0]->data[0];
+ pos = v[0]->data[posPos];
pos[0] += (-dx * c_a - dy * s_a);
pos[1] += (-dx * s_a + dy * c_a);
- pos = v[1]->data[0];
+ pos = v[1]->data[posPos];
pos[0] += (-dx * c_a - -dy * s_a);
pos[1] += (-dx * s_a + -dy * c_a);
- pos = v[2]->data[0];
+ pos = v[2]->data[posPos];
pos[0] += ( dx * c_a - dy * s_a);
pos[1] += ( dx * s_a + dy * c_a);
- pos = v[3]->data[0];
+ pos = v[3]->data[posPos];
pos[0] += ( dx * c_a - -dy * s_a);
pos[1] += ( dx * s_a + -dy * c_a);
- pos = v[4]->data[0];
+ pos = v[4]->data[posPos];
pos[0] += (-dx * c_a - dy * s_a);
pos[1] += (-dx * s_a + dy * c_a);
- pos = v[5]->data[0];
+ pos = v[5]->data[posPos];
pos[0] += (-dx * c_a - -dy * s_a);
pos[1] += (-dx * s_a + -dy * c_a);
- pos = v[6]->data[0];
+ pos = v[6]->data[posPos];
pos[0] += ( dx * c_a - dy * s_a);
pos[1] += ( dx * s_a + dy * c_a);
- pos = v[7]->data[0];
+ pos = v[7]->data[posPos];
pos[0] += ( dx * c_a - -dy * s_a);
pos[1] += ( dx * s_a + -dy * c_a);
@@ -653,8 +655,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
}
/* update vertex attrib info */
- aaline->tex_slot = draw->num_vs_outputs;
- assert(aaline->tex_slot > 0); /* output[0] is vertex pos */
+ aaline->tex_slot = draw->vs.num_vs_outputs;
+ aaline->pos_slot = draw->vs.position_output;
/* advertise the extra post-transformed vertex attribute */
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 122a48660a..87fd303649 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -85,6 +85,7 @@ struct aapoint_stage
/** this is the vertex attrib slot for the new texcoords */
uint tex_slot;
+ uint pos_slot;
/*
* Currently bound state
@@ -131,11 +132,11 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
decl->Semantic.SemanticIndex == 0) {
- aactx->colorOutput = decl->u.DeclarationRange.First;
+ aactx->colorOutput = decl->DeclarationRange.First;
}
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- if ((int) decl->u.DeclarationRange.Last > aactx->maxInput)
- aactx->maxInput = decl->u.DeclarationRange.Last;
+ if ((int) decl->DeclarationRange.Last > aactx->maxInput)
+ aactx->maxInput = decl->DeclarationRange.Last;
if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
(int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
aactx->maxGeneric = decl->Semantic.SemanticIndex;
@@ -143,8 +144,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
}
else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
uint i;
- for (i = decl->u.DeclarationRange.First;
- i <= decl->u.DeclarationRange.Last; i++) {
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
aactx->tempsUsed |= (1 << i);
}
}
@@ -193,27 +194,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
/* declare new generic input/texcoord */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_INPUT;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
- decl.Declaration.Interpolate = 1;
- /* XXX this could be linear... */
- decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = texInput;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = texInput;
ctx->emit_declaration(ctx, &decl);
/* declare new temp regs */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = tmp0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = tmp0;
ctx->emit_declaration(ctx, &decl);
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = aactx->colorTemp;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = aactx->colorTemp;
ctx->emit_declaration(ctx, &decl);
aactx->firstInstruction = FALSE;
@@ -571,6 +571,7 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header)
struct prim_header tri;
struct vertex_header *v[4];
uint texPos = aapoint->tex_slot;
+ uint pos_slot = aapoint->pos_slot;
float radius, *pos, *tex;
uint i;
float k;
@@ -620,19 +621,19 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header)
}
/* new verts */
- pos = v[0]->data[0];
+ pos = v[0]->data[pos_slot];
pos[0] -= radius;
pos[1] -= radius;
- pos = v[1]->data[0];
+ pos = v[1]->data[pos_slot];
pos[0] += radius;
pos[1] -= radius;
- pos = v[2]->data[0];
+ pos = v[2]->data[pos_slot];
pos[0] += radius;
pos[1] += radius;
- pos = v[3]->data[0];
+ pos = v[3]->data[pos_slot];
pos[0] -= radius;
pos[1] += radius;
@@ -681,9 +682,11 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
bind_aapoint_fragment_shader(aapoint);
/* update vertex attrib info */
- aapoint->tex_slot = draw->num_vs_outputs;
+ aapoint->tex_slot = draw->vs.num_vs_outputs;
assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
+ aapoint->pos_slot = draw->vs.position_output;
+
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
draw->extra_vp_outputs.slot = aapoint->tex_slot;
@@ -692,7 +695,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index ce80c94163..fa10f8efca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -112,7 +112,8 @@ static void interp( const struct clipper *clip,
const struct vertex_header *out,
const struct vertex_header *in )
{
- const unsigned nr_attrs = clip->stage.draw->num_vs_outputs;
+ const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
+ const unsigned pos_attr = clip->stage.draw->vs.position_output;
unsigned j;
/* Vertex header.
@@ -138,18 +139,17 @@ static void interp( const struct clipper *clip,
const float *trans = clip->stage.draw->viewport.translate;
const float oow = 1.0f / pos[3];
- dst->data[0][0] = pos[0] * oow * scale[0] + trans[0];
- dst->data[0][1] = pos[1] * oow * scale[1] + trans[1];
- dst->data[0][2] = pos[2] * oow * scale[2] + trans[2];
- dst->data[0][3] = oow;
+ dst->data[pos_attr][0] = pos[0] * oow * scale[0] + trans[0];
+ dst->data[pos_attr][1] = pos[1] * oow * scale[1] + trans[1];
+ dst->data[pos_attr][2] = pos[2] * oow * scale[2] + trans[2];
+ dst->data[pos_attr][3] = oow;
}
/* Other attributes
- * Note: start at 1 to skip winpos (data[0]) since we just computed
- * it above.
*/
- for (j = 1; j < nr_attrs; j++) {
- interp_attr(dst->data[j], t, in->data[j], out->data[j]);
+ for (j = 0; j < nr_attrs; j++) {
+ if (j != pos_attr)
+ interp_attr(dst->data[j], t, in->data[j], out->data[j]);
}
}
@@ -171,7 +171,7 @@ static void emit_poly( struct draw_stage *stage,
header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
header.pad = 0;
- for (i = 2; i < n; i++, header.flags = 0) {
+ for (i = 2; i < n; i++, header.flags = edge_middle) {
header.v[0] = inlist[i-1];
header.v[1] = inlist[i];
header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
@@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage,
header.flags |= edge_last;
if (0) {
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint j, k;
debug_printf("Clipped tri:\n");
for (j = 0; j < 3; j++) {
@@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage )
clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
if (clipper->flat) {
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
clipper->num_color_attribs = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 87aaf1f85b..d0d22a38e0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -55,10 +55,12 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
static void cull_tri( struct draw_stage *stage,
struct prim_header *header )
{
+ const unsigned pos = stage->draw->vs.position_output;
+
/* Window coords: */
- const float *v0 = header->v[0]->data[0];
- const float *v1 = header->v[1]->data[0];
- const float *v2 = header->v[2]->data[0];
+ const float *v0 = header->v[0]->data[pos];
+ const float *v1 = header->v[1]->data[pos];
+ const float *v2 = header->v[2]->data[pos];
/* edge vectors e = v0 - v2, f = v1 - v2 */
const float ex = v0[0] - v2[0];
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 09b68c4559..4741b22d02 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -40,9 +40,19 @@ struct flat_stage
struct draw_stage stage;
uint num_color_attribs;
- uint color_attribs[4]; /* front/back primary/secondary colors */
+ uint color_attribs[2]; /* front/back primary colors */
+
+ uint num_spec_attribs;
+ uint spec_attribs[2]; /* front/back secondary colors */
};
+#define COPY_3FV( DST, SRC ) \
+do { \
+ (DST)[0] = (SRC)[0]; \
+ (DST)[1] = (SRC)[1]; \
+ (DST)[2] = (SRC)[2]; \
+} while (0)
+
static INLINE struct flat_stage *
flat_stage(struct draw_stage *stage)
@@ -58,10 +68,16 @@ static INLINE void copy_colors( struct draw_stage *stage,
{
const struct flat_stage *flat = flat_stage(stage);
uint i;
+
for (i = 0; i < flat->num_color_attribs; i++) {
const uint attr = flat->color_attribs[i];
COPY_4FV(dst->data[attr], src->data[attr]);
}
+
+ for (i = 0; i < flat->num_spec_attribs; i++) {
+ const uint attr = flat->spec_attribs[i];
+ COPY_3FV(dst->data[attr], src->data[attr]);
+ }
}
@@ -78,6 +94,12 @@ static INLINE void copy_colors2( struct draw_stage *stage,
COPY_4FV(dst0->data[attr], src->data[attr]);
COPY_4FV(dst1->data[attr], src->data[attr]);
}
+
+ for (i = 0; i < flat->num_spec_attribs; i++) {
+ const uint attr = flat->spec_attribs[i];
+ COPY_3FV(dst0->data[attr], src->data[attr]);
+ COPY_3FV(dst1->data[attr], src->data[attr]);
+ }
}
@@ -159,15 +181,19 @@ static void flatshade_line_1( struct draw_stage *stage,
static void flatshade_init_state( struct draw_stage *stage )
{
struct flat_stage *flat = flat_stage(stage);
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
/* Find which vertex shader outputs are colors, make a list */
flat->num_color_attribs = 0;
+ flat->num_spec_attribs = 0;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- flat->color_attribs[flat->num_color_attribs++] = i;
+ if (vs->info.output_semantic_index[i] == 0)
+ flat->color_attribs[flat->num_color_attribs++] = i;
+ else
+ flat->spec_attribs[flat->num_spec_attribs++] = i;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index ea6de8c571..8f1650e55c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -62,14 +62,15 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
static void do_offset_tri( struct draw_stage *stage,
struct prim_header *header )
{
+ const unsigned pos = stage->draw->vs.position_output;
struct offset_stage *offset = offset_stage(stage);
float inv_det = 1.0f / header->det;
/* Window coords:
*/
- float *v0 = header->v[0]->data[0];
- float *v1 = header->v[1]->data[0];
- float *v2 = header->v[2]->data[0];
+ float *v0 = header->v[0]->data[pos];
+ float *v1 = header->v[1]->data[pos];
+ float *v2 = header->v[2]->data[pos];
/* edge vectors e = v0 - v2, f = v1 - v2 */
float ex = v0[0] - v2[0];
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 4c92416eb1..4087cf7a49 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -132,20 +132,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
uint i;
- for (i = decl->u.DeclarationRange.First;
- i <= decl->u.DeclarationRange.Last; i++) {
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
pctx->samplersUsed |= 1 << i;
}
}
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last);
+ pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last);
if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION)
- pctx->wincoordInput = (int) decl->u.DeclarationRange.First;
+ pctx->wincoordInput = (int) decl->DeclarationRange.First;
}
else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
uint i;
- for (i = decl->u.DeclarationRange.First;
- i <= decl->u.DeclarationRange.Last; i++) {
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last; i++) {
pctx->tempsUsed |= (1 << i);
}
}
@@ -223,28 +223,27 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
/* declare new position input reg */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION;
decl.Semantic.SemanticIndex = 0;
- decl.Declaration.Interpolate = 1;
- decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = wincoordInput;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = wincoordInput;
ctx->emit_declaration(ctx, &decl);
}
/* declare new sampler */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = pctx->freeSampler;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = pctx->freeSampler;
ctx->emit_declaration(ctx, &decl);
/* declare new temp regs */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = pctx->texTemp;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = pctx->texTemp;
ctx->emit_declaration(ctx, &decl);
/* emit immediate = {1/32, 1/32, 1, 1}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 3cbced362e..bf0db18a68 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw,
const struct vertex_header *v1 )
{
uint attr;
- for (attr = 0; attr < draw->num_vs_outputs; attr++) {
+ for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
const float *val0 = v0->data[attr];
const float *val1 = v1->data[attr];
float *newv = dst->data[attr];
@@ -119,8 +119,9 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
struct stipple_stage *stipple = stipple_stage(stage);
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
- const float *pos0 = v0->data[0];
- const float *pos1 = v1->data[0];
+ const unsigned pos = stage->draw->vs.position_output;
+ const float *pos0 = v0->data[pos];
+ const float *pos1 = v1->data[pos];
float start = 0;
int state = 0;
@@ -175,6 +176,22 @@ reset_stipple_counter(struct draw_stage *stage)
stage->next->reset_stipple_counter( stage->next );
}
+static void
+stipple_reset_point(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->point(stage->next, header);
+}
+
+static void
+stipple_reset_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->tri(stage->next, header);
+}
+
static void
stipple_first_line(struct draw_stage *stage,
@@ -220,9 +237,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.draw = draw;
stipple->stage.next = NULL;
- stipple->stage.point = draw_pipe_passthrough_point;
+ stipple->stage.point = stipple_reset_point;
stipple->stage.line = stipple_first_line;
- stipple->stage.tri = draw_pipe_passthrough_tri;
+ stipple->stage.tri = stipple_reset_tri;
stipple->stage.reset_stipple_counter = reset_stipple_counter;
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 50872fdbe9..3ac825f565 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct twoside_stage *twoside = twoside_stage(stage);
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
twoside->attrib_front0 = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 67b9a9503d..a6fde77a0e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -136,6 +136,8 @@ emit_vertex( struct vbuf_stage *vbuf,
* set_buffer is efficient. Consider a special one-shot mode for
* translate.
*/
+ /* Note: we really do want data[0] here, not data[pos]:
+ */
vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
@@ -145,7 +147,7 @@ emit_vertex( struct vbuf_stage *vbuf,
vertex->vertex_id = vbuf->nr_vertices++;
}
- return vertex->vertex_id;
+ return (ushort)vertex->vertex_id;
}
@@ -387,6 +389,15 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
+
+ /* even number */
+ vbuf->max_vertices = vbuf->max_vertices & ~1;
+
+ /* Must always succeed -- driver gives us a
+ * 'max_vertex_buffer_bytes' which it guarantees it can allocate,
+ * and it will flush itself if necessary to do so. If this does
+ * fail, we are basically without usable hardware.
+ */
vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
(ushort) vbuf->vertex_size,
(ushort) vbuf->max_vertices);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 878c9c7169..29649f5787 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -58,6 +58,7 @@ static void wideline_line( struct draw_stage *stage,
struct prim_header *header )
{
/*const struct wideline_stage *wide = wideline_stage(stage);*/
+ const unsigned pos = stage->draw->vs.position_output;
const float half_width = 0.5f * stage->draw->rasterizer->line_width;
struct prim_header tri;
@@ -67,10 +68,10 @@ static void wideline_line( struct draw_stage *stage,
struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
+ float *pos0 = v0->data[pos];
+ float *pos1 = v1->data[pos];
+ float *pos2 = v2->data[pos];
+ float *pos3 = v3->data[pos];
const float dx = FABSF(pos0[0] - pos2[0]);
const float dy = FABSF(pos0[1] - pos2[1]);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index ed08573382..d40a07f4ae 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -96,6 +96,7 @@ static void widepoint_point( struct draw_stage *stage,
struct prim_header *header )
{
const struct widepoint_stage *wide = widepoint_stage(stage);
+ const unsigned pos = stage->draw->vs.position_output;
const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
float half_size;
float left_adj, right_adj, bot_adj, top_adj;
@@ -108,10 +109,10 @@ static void widepoint_point( struct draw_stage *stage,
struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
+ float *pos0 = v0->data[pos];
+ float *pos1 = v1->data[pos];
+ float *pos2 = v2->data[pos];
+ float *pos3 = v3->data[pos];
/* point size is either per-vertex or fixed size */
if (wide->psize_slot >= 0) {
@@ -197,7 +198,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (draw->rasterizer->point_sprite) {
/* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i, j = 0;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
@@ -212,7 +213,7 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index cee58bbf73..7bd1e670b4 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -124,6 +124,7 @@ struct draw_context
struct {
struct {
struct draw_pt_middle_end *fetch_emit;
+ struct draw_pt_middle_end *fetch_shade_emit;
struct draw_pt_middle_end *general;
} middle;
@@ -146,6 +147,8 @@ struct draw_context
const void *elts;
/** bytes per index (0, 1, 2 or 4) */
unsigned eltSize;
+ unsigned min_index;
+ unsigned max_index;
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
@@ -154,6 +157,8 @@ struct draw_context
const void *constants;
} user;
+ boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
+ boolean no_fse; /* disable FSE even when it is correct */
} pt;
struct {
@@ -167,13 +172,36 @@ struct draw_context
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
+ boolean identity_viewport;
+
+ struct {
+ struct draw_vertex_shader *vertex_shader;
+ uint num_vs_outputs; /**< convenience, from vertex_shader */
+ uint position_output;
- struct draw_vertex_shader *vertex_shader;
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine machine;
+
+ /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
+ */
+ struct gallivm_cpu_engine *engine;
+
+ /* Here's another one:
+ */
+ struct aos_machine *aos_machine;
- boolean identity_viewport;
- uint num_vs_outputs; /**< convenience, from vertex_shader */
+ const float (*aligned_constants)[4];
+ float (*aligned_constant_storage)[4];
+ unsigned const_storage_size;
+
+
+ struct translate *fetch;
+ struct translate_cache *fetch_cache;
+ struct translate *emit;
+ struct translate_cache *emit_cache;
+ } vs;
/* Clip derived state:
*/
@@ -190,17 +218,22 @@ struct draw_context
unsigned reduced_prim;
- /** TGSI program interpreter runtime state */
- struct tgsi_exec_machine machine;
-
- /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
- */
- struct gallivm_cpu_engine *engine;
void *driver_private;
};
+/*******************************************************************************
+ * Vertex shader code:
+ */
+boolean draw_vs_init( struct draw_context *draw );
+void draw_vs_destroy( struct draw_context *draw );
+void draw_vs_set_viewport( struct draw_context *,
+ const struct pipe_viewport_state * );
+
+void draw_vs_set_constants( struct draw_context *,
+ const float (*constants)[4],
+ unsigned size );
@@ -232,6 +265,7 @@ void draw_pipeline_destroy( struct draw_context *draw );
* These flags expected at first vertex of lines & triangles when
* unfilled and/or line stipple modes are operational.
*/
+#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
@@ -247,6 +281,12 @@ void draw_pipeline_run( struct draw_context *draw,
const ushort *elts,
unsigned count );
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+
void draw_pipeline_flush( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index c9c5d18313..9140faeea9 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -35,6 +35,10 @@
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+static unsigned trim( unsigned count, unsigned first, unsigned incr )
+{
+ return count - (count - first) % incr;
+}
@@ -54,6 +58,17 @@ draw_pt_arrays(struct draw_context *draw,
struct draw_pt_middle_end *middle = NULL;
unsigned opt = 0;
+ /* Sanitize primitive length:
+ */
+ {
+ unsigned first, incr;
+ draw_pt_split_prim(prim, &first, &incr);
+ count = trim(count, first, incr);
+ if (count < first)
+ return TRUE;
+ }
+
+
if (!draw->render) {
opt |= PT_PIPELINE;
}
@@ -64,7 +79,7 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping) {
+ if (!draw->bypass_clipping && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@@ -72,16 +87,18 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
- if (opt)
- middle = draw->pt.middle.general;
- else
+
+ if (opt == 0)
middle = draw->pt.middle.fetch_emit;
+ else if (opt == PT_SHADE && !draw->pt.no_fse)
+ middle = draw->pt.middle.fetch_shade_emit;
+ else
+ middle = draw->pt.middle.general;
/* Pick the right frontend
*/
- if (draw->pt.user.elts ||
- count >= 256) {
+ if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
} else {
frontend = draw->pt.front.varray;
@@ -102,6 +119,9 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
+ draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE);
+ draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE);
+
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
return FALSE;
@@ -114,6 +134,10 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
+ draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+ if (!draw->pt.middle.fetch_shade_emit)
+ return FALSE;
+
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
return FALSE;
@@ -134,6 +158,11 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_emit = NULL;
}
+ if (draw->pt.middle.fetch_shade_emit) {
+ draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit );
+ draw->pt.middle.fetch_shade_emit = NULL;
+ }
+
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
@@ -147,19 +176,6 @@ void draw_pt_destroy( struct draw_context *draw )
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
-};
-
/**
* Draw vertex arrays
@@ -172,9 +188,10 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- if (reduced_prim[prim] != draw->reduced_prim) {
+ unsigned reduced_prim = draw_pt_reduced_prim(prim);
+ if (reduced_prim != draw->reduced_prim) {
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->reduced_prim = reduced_prim[prim];
+ draw->reduced_prim = reduced_prim;
}
/* drawing done here: */
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 2dec376cee..3d2a9c78b7 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -84,7 +84,8 @@ struct draw_pt_front_end {
struct draw_pt_middle_end {
void (*prepare)( struct draw_pt_middle_end *,
unsigned prim,
- unsigned opt );
+ unsigned opt,
+ unsigned *max_vertices );
void (*run)( struct draw_pt_middle_end *,
const unsigned *fetch_elts,
@@ -92,6 +93,21 @@ struct draw_pt_middle_end {
const ushort *draw_elts,
unsigned draw_count );
+ void (*run_linear)(struct draw_pt_middle_end *,
+ unsigned start,
+ unsigned count);
+
+ /* Transform all vertices in a linear range and then draw them with
+ * the supplied element list.
+ */
+ void (*run_linear_elts)( struct draw_pt_middle_end *,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count );
+
+ int (*get_max_vertex_count)( struct draw_pt_middle_end * );
+
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
};
@@ -117,6 +133,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw,
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+
/* Middle-ends:
*
* Currently one general-purpose case which can do all possibilities,
@@ -128,6 +145,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
* vertex_elements.
*/
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
@@ -143,7 +161,8 @@ boolean draw_pt_get_edgeflag( struct draw_context *draw,
struct pt_emit;
void draw_pt_emit_prepare( struct pt_emit *emit,
- unsigned prim );
+ unsigned prim,
+ unsigned *max_vertices );
void draw_pt_emit( struct pt_emit *emit,
const float (*vertex_data)[4],
@@ -152,6 +171,13 @@ void draw_pt_emit( struct pt_emit *emit,
const ushort *elts,
unsigned count );
+void draw_pt_emit_linear( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ unsigned start,
+ unsigned count );
+
void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
@@ -170,6 +196,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
unsigned count,
char *verts );
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts );
+
void draw_pt_fetch_destroy( struct pt_fetch *fetch );
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
@@ -194,4 +225,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
+/*******************************************************************************
+ * Utils:
+ */
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_reduced_prim(unsigned prim);
+
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
new file mode 100644
index 0000000000..3fb0695687
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -0,0 +1,153 @@
+
+
+static void FUNC( ARGS,
+ unsigned count )
+{
+ LOCAL_VARS;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT( (i + 0) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE( DRAW_PIPE_RESET_STIPPLE,
+ (i + 0),
+ (i + 1));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+
+ LINE( flags,
+ (i - 1),
+ (0 ));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1),
+ (i + 2 ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1 + (i&1)),
+ (i + 2 - (i&1)));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0 + (i&1)),
+ (i + 1 - (i&1)),
+ (i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 1),
+ (i + 2),
+ (0 ));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (0),
+ (i + 1),
+ (i + 2 ));
+ }
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 0),
+ (i + 1),
+ (i + 2),
+ (i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 2),
+ (i + 0),
+ (i + 1),
+ (i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+
+ for (i = 0; i+2 < count; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE( flags,
+ (i + 1),
+ (i + 2),
+ (0));
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FLUSH;
+}
+
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
index 2094c081ed..b7780fb507 100644
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -60,10 +60,10 @@ static unsigned elt_vert( const void *elts, unsigned idx )
pt_elt_func draw_pt_elt_func( struct draw_context *draw )
{
switch (draw->pt.user.eltSize) {
- case 0: return elt_vert;
- case 1: return elt_ubyte;
- case 2: return elt_ushort;
- case 4: return elt_uint;
+ case 0: return &elt_vert;
+ case 1: return &elt_ubyte;
+ case 2: return &elt_ushort;
+ case 4: return &elt_uint;
default: return NULL;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index ce3a153f64..40f05cb9e0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -40,10 +40,14 @@ struct pt_emit {
struct translate *translate;
struct translate_cache *cache;
+ unsigned prim;
+
+ const struct vertex_info *vinfo;
};
void draw_pt_emit_prepare( struct pt_emit *emit,
- unsigned prim )
+ unsigned prim,
+ unsigned *max_vertices )
{
struct draw_context *draw = emit->draw;
const struct vertex_info *vinfo;
@@ -51,8 +55,18 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
struct translate_key hw_key;
unsigned i;
boolean ok;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+
+ /* XXX: may need to defensively reset this later on as clipping can
+ * clobber this state in the render backend.
+ */
+ emit->prim = prim;
- ok = draw->render->set_primitive(draw->render, prim);
+ ok = draw->render->set_primitive(draw->render, emit->prim);
if (!ok) {
assert(0);
return;
@@ -60,7 +74,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
/* Must do this after set_primitive() above:
*/
- vinfo = draw->render->get_vertex_info(draw->render);
+ emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
/* Translate from pipeline vertices to hw vertices.
@@ -100,6 +114,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
case EMIT_4UB:
output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
emit_sz = 4 * sizeof(ubyte);
+ break;
default:
assert(0);
output_format = PIPE_FORMAT_NONE;
@@ -125,6 +140,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
translate_key_sanitize(&hw_key);
emit->translate = translate_cache_find(emit->cache, &hw_key);
}
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* even number */
+ *max_vertices = *max_vertices & ~1;
}
@@ -144,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ /* XXX: and work out some way to coordinate the render primitive
+ * between vbuf.c and here...
+ */
+ if (!draw->render->set_primitive(draw->render, emit->prim)) {
+ assert(0);
+ return;
+ }
+
hw_verts = render->allocate_vertices(render,
(ushort)translate->key.output_stride,
(ushort)vertex_count);
@@ -178,6 +207,72 @@ void draw_pt_emit( struct pt_emit *emit,
}
+void draw_pt_emit_linear(struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ unsigned start,
+ unsigned count)
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+#if 0
+ debug_printf("Linear emit\n");
+#endif
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ /* XXX: and work out some way to coordinate the render primitive
+ * between vbuf.c and here...
+ */
+ if (!draw->render->set_primitive(draw->render, emit->prim)) {
+ assert(0);
+ return;
+ }
+
+ hw_verts = render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count);
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ translate->set_buffer(translate, 0,
+ vertex_data, stride);
+
+ translate->set_buffer(translate, 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run(translate,
+ 0,
+ vertex_count,
+ hw_verts);
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < vertex_count; i++) {
+ debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
+ draw_dump_emitted_vertex( emit->vinfo,
+ (const uint8_t *)hw_verts +
+ translate->key.output_stride * i );
+ }
+ }
+
+
+ render->draw_arrays(render, start, count);
+
+ render->release_vertices(render,
+ hw_verts,
+ translate->key.output_stride,
+ vertex_count);
+}
+
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
{
struct pt_emit *emit = CALLOC_STRUCT(pt_emit);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index b96335b789..07f4c99164 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -166,6 +166,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
}
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
+
+ translate->run( translate,
+ start,
+ count,
+ verts );
+
+ /* Edgeflags are hard to fit into a translate program, populate
+ * them separately if required. In the setup above they are
+ * defaulted to one, so only need this if there is reason to change
+ * that default:
+ */
+ if (fetch->need_edgeflags) {
+ for (i = 0; i < count; i++) {
+ struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
+ vh->edgeflag = draw_pt_get_edgeflag( draw, start + i );
+ }
+ }
+}
+
+
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
{
struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 4ea7d4359f..4a1f3b0953 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -90,7 +90,8 @@ struct fetch_emit_middle_end {
static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
- unsigned opt )
+ unsigned opt,
+ unsigned *max_vertices )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -196,6 +197,18 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].pitch );
}
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* Return an even number of verts.
+ * This prevents "parity" errors when splitting long triangle strips which
+ * can lead to front/back culling mix-ups.
+ * Every other triangle in a strip has an alternate front/back orientation
+ * so splitting at an odd position can cause the orientation of subsequent
+ * triangles to get reversed.
+ */
+ *max_vertices = *max_vertices & ~1;
}
@@ -258,6 +271,106 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
+static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ start,
+ count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0, /*start*/
+ count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ feme->translate->key.output_stride,
+ count );
+
+}
+
+
+static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ start,
+ count,
+ hw_verts );
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ feme->translate->key.output_stride,
+ count );
+
+}
+
+
+
static void fetch_emit_finish( struct draw_pt_middle_end *middle )
{
@@ -287,10 +400,12 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
return NULL;
}
- fetch_emit->base.prepare = fetch_emit_prepare;
- fetch_emit->base.run = fetch_emit_run;
- fetch_emit->base.finish = fetch_emit_finish;
- fetch_emit->base.destroy = fetch_emit_destroy;
+ fetch_emit->base.prepare = fetch_emit_prepare;
+ fetch_emit->base.run = fetch_emit_run;
+ fetch_emit->base.run_linear = fetch_emit_run_linear;
+ fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts;
+ fetch_emit->base.finish = fetch_emit_finish;
+ fetch_emit->base.destroy = fetch_emit_destroy;
fetch_emit->draw = draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
new file mode 100644
index 0000000000..fdf9b6fe6a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -0,0 +1,406 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+
+#include "translate/translate.h"
+
+struct fetch_shade_emit;
+
+
+/* Prototype fetch, shade, emit-hw-verts all in one go.
+ */
+struct fetch_shade_emit {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+
+ /* Temporaries:
+ */
+ const float *constants;
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ const ubyte *src[PIPE_MAX_ATTRIBS];
+ unsigned prim;
+
+ struct draw_vs_varient_key key;
+ struct draw_vs_varient *active;
+
+
+ const struct vertex_info *vinfo;
+};
+
+
+
+
+static void fse_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
+ const struct vertex_info *vinfo;
+ unsigned i;
+
+
+ if (!draw->render->set_primitive( draw->render,
+ prim )) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ fse->key.output_stride = vinfo->size * 4;
+ fse->key.nr_outputs = vinfo->num_attribs;
+ fse->key.nr_inputs = num_vs_inputs;
+
+ fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */
+ fse->key.nr_inputs); /* inputs - fetch from api format */
+
+ fse->key.viewport = !draw->identity_viewport;
+ fse->key.clip = !draw->bypass_clipping;
+ fse->key.pad = 0;
+
+ memset(fse->key.element, 0,
+ fse->key.nr_elements * sizeof(fse->key.element[0]));
+
+ for (i = 0; i < num_vs_inputs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].in.format = src->src_format;
+
+ /* Consider ignoring these, ie make generated programs
+ * independent of this state:
+ */
+ fse->key.element[i].in.buffer = src->vertex_buffer_index;
+ fse->key.element[i].in.offset = src->src_offset;
+ }
+
+
+ {
+ unsigned dst_offset = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_4UB:
+ emit_sz = 4 * sizeof(ubyte);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* The elements in the key correspond to vertex shader output
+ * numbers, not to positions in the hw vertex description --
+ * that's handled by the output_offset field.
+ */
+ fse->key.element[i].out.format = vinfo->emit[i];
+ fse->key.element[i].out.vs_output = vinfo->src_index[i];
+ fse->key.element[i].out.offset = dst_offset;
+
+ dst_offset += emit_sz;
+ assert(fse->key.output_stride >= dst_offset);
+ }
+ }
+
+
+ /* Would normally look up a vertex shader and peruse its list of
+ * varients somehow. We omitted that step and put all the
+ * hardcoded "shaders" into an array. We're just making the
+ * assumption that this happens to be a matching shader... ie
+ * you're running isosurf, aren't you?
+ */
+ fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
+ &fse->key );
+
+ if (!fse->active) {
+ assert(0);
+ return ;
+ }
+
+ /* Now set buffer pointers:
+ */
+ for (i = 0; i < num_vs_inputs; i++) {
+ unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
+
+ fse->active->set_input( fse->active,
+ i,
+
+ ((const ubyte *) draw->pt.user.vbuffer[buf] +
+ draw->pt.vertex_buffer[buf].buffer_offset),
+
+ draw->pt.vertex_buffer[buf].pitch );
+ }
+
+ *max_vertices = (draw->render->max_vertex_buffer_bytes /
+ (vinfo->size * 4));
+
+ /* Return an even number of verts.
+ * This prevents "parity" errors when splitting long triangle strips which
+ * can lead to front/back culling mix-ups.
+ * Every other triangle in a strip has an alternate front/back orientation
+ * so splitting at an odd position can cause the orientation of subsequent
+ * triangles to get reversed.
+ */
+ *max_vertices = *max_vertices & ~1;
+
+ /* Probably need to do this somewhere (or fix exec shader not to
+ * need it):
+ */
+ if (1) {
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ vs->prepare(vs, draw);
+ }
+
+
+ //return TRUE;
+}
+
+
+
+
+
+
+
+static void fse_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count );
+
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i,
+ fse->key.output_stride,
+ fse->key.output_stride * i);
+
+ draw_dump_emitted_vertex( fse->vinfo,
+ (const uint8_t *)hw_verts + fse->key.output_stride * i );
+ }
+ }
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+}
+
+
+static void
+fse_run(struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ */
+ fse->active->run_elts( fse->active,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
+ draw_dump_emitted_vertex( fse->vinfo,
+ (const uint8_t *)hw_verts +
+ fse->key.output_stride * i );
+ }
+ }
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ fetch_count );
+
+}
+
+
+
+static void fse_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count );
+
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+}
+
+
+
+static void fse_finish( struct draw_pt_middle_end *middle )
+{
+}
+
+
+static void
+fse_destroy( struct draw_pt_middle_end *middle )
+{
+ FREE(middle);
+}
+
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
+{
+ struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
+ if (!fse)
+ return NULL;
+
+ fse->base.prepare = fse_prepare;
+ fse->base.run = fse_run;
+ fse->base.run_linear = fse_run_linear;
+ fse->base.run_linear_elts = fse_run_linear_elts;
+ fse->base.finish = fse_finish;
+ fse->base.destroy = fse_destroy;
+ fse->draw = draw;
+
+ return &fse->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 4ec20493c4..0aec4b71ba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -51,11 +51,12 @@ struct fetch_pipeline_middle_end {
static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
- unsigned opt )
+ unsigned opt,
+ unsigned *max_vertices )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vs = draw->vertex_shader;
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
@@ -81,24 +82,33 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- draw->bypass_clipping,
- draw->identity_viewport,
- draw->rasterizer->gl_rasterization_rules );
+ (boolean)draw->bypass_clipping,
+ (boolean)draw->identity_viewport,
+ (boolean)draw->rasterizer->gl_rasterization_rules );
- if (!(opt & PT_PIPELINE))
+ if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit,
- prim );
+ prim,
+ max_vertices );
+
+ *max_vertices = MAX2( *max_vertices,
+ DRAW_PIPE_MAX_VERTICES );
+ }
+ else {
+ *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ }
+
+ /* return even number */
+ *max_vertices = *max_vertices & ~1;
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
-
}
-
static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
@@ -107,7 +117,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vertex_shader;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( fetch_count, 4 );
@@ -162,7 +172,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
fpme->vertex_size,
draw_elts,
draw_count );
- }
+ }
else {
draw_pt_emit( fpme->emit,
(const float (*)[4])pipeline_verts->data,
@@ -177,6 +187,157 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
}
+static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count)
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run_linear( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size);
+ }
+ else {
+ draw_pt_emit_linear( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ 0, /*start*/
+ count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
+
+static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
{
@@ -206,10 +367,12 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
if (!fpme)
goto fail;
- fpme->base.prepare = fetch_pipeline_prepare;
- fpme->base.run = fetch_pipeline_run;
- fpme->base.finish = fetch_pipeline_finish;
- fpme->base.destroy = fetch_pipeline_destroy;
+ fpme->base.prepare = fetch_pipeline_prepare;
+ fpme->base.run = fetch_pipeline_run;
+ fpme->base.run_linear = fetch_pipeline_linear_run;
+ fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
+ fpme->base.finish = fetch_pipeline_finish;
+ fpme->base.destroy = fetch_pipeline_destroy;
fpme->draw = draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index c4a67c8289..af6306b1c6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -100,16 +100,19 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
+ const unsigned pos = pvs->draw->vs.position_output;
unsigned clipped = 0;
unsigned j;
if (0) debug_printf("%s\n");
for (j = 0; j < count; j++) {
- out->clip[0] = out->data[0][0];
- out->clip[1] = out->data[0][1];
- out->clip[2] = out->data[0][2];
- out->clip[3] = out->data[0][3];
+ float *position = out->data[pos];
+
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
out->vertex_id = 0xffff;
out->clipmask = compute_clipmask_gl(out->clip,
@@ -120,19 +123,19 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
if (out->clipmask == 0)
{
/* divide by w */
- float w = 1.0f / out->data[0][3];
+ float w = 1.0f / position[3];
/* Viewport mapping */
- out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0];
- out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1];
- out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2];
- out->data[0][3] = w;
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
#if 0
debug_printf("post viewport: %f %f %f %f\n",
- out->data[0][0],
- out->data[0][1],
- out->data[0][2],
- out->data[0][3]);
+ position[0],
+ position[1],
+ position[2],
+ position[3]);
#endif
}
@@ -154,15 +157,18 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs,
struct vertex_header *out = vertices;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
+ const unsigned pos = pvs->draw->vs.position_output;
unsigned j;
if (0) debug_printf("%s\n", __FUNCTION__);
for (j = 0; j < count; j++) {
+ float *position = out->data[pos];
+
/* Viewport mapping only, no cliptest/rhw divide
*/
- out->data[0][0] = out->data[0][0] * scale[0] + trans[0];
- out->data[0][1] = out->data[0][1] * scale[1] + trans[1];
- out->data[0][2] = out->data[0][2] * scale[2] + trans[2];
+ position[0] = position[0] * scale[0] + trans[0];
+ position[1] = position[1] * scale[1] + trans[1];
+ position[2] = position[2] * scale[2] + trans[2];
out = (struct vertex_header *)((char *)out + stride);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
new file mode 100644
index 0000000000..32c8a9632c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -0,0 +1,103 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ *first = 2;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ *first = 3;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ break;
+ default:
+ assert(0);
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ break;
+ }
+}
+
+
+unsigned draw_pt_reduced_prim(unsigned prim)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return PIPE_PRIM_POINTS;
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ return PIPE_PRIM_LINES;
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ return PIPE_PRIM_TRIANGLES;
+ default:
+ assert(0);
+ return PIPE_PRIM_POINTS;
+ }
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 355093f945..46e722a154 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -31,7 +31,7 @@
#include "draw/draw_pt.h"
#define FETCH_MAX 256
-#define DRAW_MAX (16*FETCH_MAX)
+#define DRAW_MAX (FETCH_MAX+8)
struct varray_frontend {
struct draw_pt_front_end base;
@@ -40,8 +40,8 @@ struct varray_frontend {
ushort draw_elts[DRAW_MAX];
unsigned fetch_elts[FETCH_MAX];
- unsigned draw_count;
- unsigned fetch_count;
+ unsigned driver_fetch_max;
+ unsigned fetch_max;
struct draw_pt_middle_end *middle;
@@ -49,147 +49,86 @@ struct varray_frontend {
unsigned output_prim;
};
-static void varray_flush(struct varray_frontend *varray)
-{
- if (varray->draw_count) {
-#if 0
- debug_printf("FLUSH fc = %d, dc = %d\n",
- varray->fetch_count,
- varray->draw_count);
-#endif
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- varray->fetch_count,
- varray->draw_elts,
- varray->draw_count);
- }
-
- varray->fetch_count = 0;
- varray->draw_count = 0;
-}
-#if 0
-static void varray_check_flush(struct varray_frontend *varray)
+static void varray_flush_linear(struct varray_frontend *varray,
+ unsigned start, unsigned count)
{
- if (varray->draw_count + 6 >= DRAW_MAX/* ||
- varray->fetch_count + 4 >= FETCH_MAX*/) {
- varray_flush(varray);
+ if (count) {
+ assert(varray->middle->run_linear);
+ varray->middle->run_linear(varray->middle, start, count);
}
}
-#endif
-static INLINE void add_draw_el(struct varray_frontend *varray,
- int idx, ushort flags)
+static void varray_line_loop_segment(struct varray_frontend *varray,
+ unsigned start,
+ unsigned segment_start,
+ unsigned segment_count,
+ boolean end )
{
- varray->draw_elts[varray->draw_count++] = idx | flags;
-}
+ assert(segment_count+1 < varray->fetch_max);
+ if (segment_count >= 1) {
+ unsigned nr = 0, i;
+ for (i = 0; i < segment_count; i++)
+ varray->fetch_elts[nr++] = start + segment_start + i;
-static INLINE void varray_triangle( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- add_draw_el(varray, i0, 0);
- add_draw_el(varray, i1, 0);
- add_draw_el(varray, i2, 0);
-}
+ if (end)
+ varray->fetch_elts[nr++] = start;
-static INLINE void varray_triangle_flags( struct varray_frontend *varray,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- add_draw_el(varray, i0, flags);
- add_draw_el(varray, i1, 0);
- add_draw_el(varray, i2, 0);
-}
+ assert(nr < FETCH_MAX);
-static INLINE void varray_line( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1 )
-{
- add_draw_el(varray, i0, 0);
- add_draw_el(varray, i1, 0);
+ varray->middle->run(varray->middle,
+ varray->fetch_elts,
+ nr,
+ varray->draw_elts, /* ie. linear */
+ nr);
+ }
}
-static INLINE void varray_line_flags( struct varray_frontend *varray,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- add_draw_el(varray, i0, flags);
- add_draw_el(varray, i1, 0);
-}
-
-static INLINE void varray_point( struct varray_frontend *varray,
- unsigned i0 )
+static void varray_fan_segment(struct varray_frontend *varray,
+ unsigned start,
+ unsigned segment_start,
+ unsigned segment_count )
{
- add_draw_el(varray, i0, 0);
-}
+ assert(segment_count+1 < varray->fetch_max);
+ if (segment_count >= 2) {
+ unsigned nr = 0, i;
-static INLINE void varray_quad( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- varray_triangle( varray, i0, i1, i3 );
- varray_triangle( varray, i1, i2, i3 );
-}
+ if (segment_start != 0)
+ varray->fetch_elts[nr++] = start;
-static INLINE void varray_ef_quad( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
- const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
+ for (i = 0 ; i < segment_count; i++)
+ varray->fetch_elts[nr++] = start + segment_start + i;
- varray_triangle_flags( varray,
- DRAW_PIPE_RESET_STIPPLE | omitEdge1,
- i0, i1, i3 );
+ assert(nr < FETCH_MAX);
- varray_triangle_flags( varray,
- omitEdge2,
- i1, i2, i3 );
+ varray->middle->run(varray->middle,
+ varray->fetch_elts,
+ nr,
+ varray->draw_elts, /* ie. linear */
+ nr);
+ }
}
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) varray_point(vc,i0)
-#define FUNC varray_run_extras
-#include "draw_pt_varray_tmp.h"
-
-#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1)
-#define POINT(vc,i0) varray_point(vc,i0)
-#define FUNC varray_run
-#include "draw_pt_varray_tmp.h"
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+#define FUNC varray_run
+#include "draw_pt_varray_tmp_linear.h"
+
+static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
+ PIPE_PRIM_LINE_STRIP,
PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
+ PIPE_PRIM_TRIANGLE_STRIP,
+ PIPE_PRIM_TRIANGLE_FAN,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_POLYGON
};
@@ -201,20 +140,18 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
- if (opt & PT_PIPELINE)
- {
- varray->base.run = varray_run_extras;
- }
- else
- {
- varray->base.run = varray_run;
- }
+ varray->base.run = varray_run;
varray->input_prim = prim;
- varray->output_prim = reduced_prim[prim];
+ varray->output_prim = decompose_prim[prim];
varray->middle = middle;
- middle->prepare(middle, varray->output_prim, opt);
+ middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max );
+
+ /* check that the max is even */
+ assert((varray->driver_fetch_max & 1) == 0);
+
+ varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
}
@@ -235,6 +172,7 @@ static void varray_destroy(struct draw_pt_front_end *frontend)
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
{
+ ushort i;
struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
if (varray == NULL)
return NULL;
@@ -245,5 +183,9 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
varray->base.destroy = varray_destroy;
varray->draw = draw;
+ for (i = 0; i < DRAW_MAX; i++) {
+ varray->draw_elts[i] = i;
+ }
+
return &varray->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index b9a319b253..7c722457c3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -10,32 +10,45 @@ static void FUNC(struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
- unsigned i, flags;
+ unsigned i, j;
+ ushort flags;
+ unsigned first, incr;
+
+ varray->fetch_start = start;
+
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count);
-#endif
-#if 0
- debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim,
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
start, count);
#endif
- for (i = 0; i < count; ++i) {
- varray->fetch_elts[i] = start + i;
- }
- varray->fetch_count = count;
-
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT(varray, i + 0);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i < end; i++) {
+ POINT(varray, i + 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+1 < end; i += 2) {
+ LINE(varray, DRAW_PIPE_RESET_STIPPLE,
+ i + 0, i + 1);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
@@ -43,38 +56,81 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ LINE(varray, flags, i - 1, 0);
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
- LINE(varray, flags, i - 1, 0);
}
break;
case PIPE_PRIM_LINE_STRIP:
flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i += 3) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1, i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i + 2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
break;
@@ -83,51 +139,89 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 3) {
if (flatfirst) {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
else {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
}
break;
case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 4) {
+ QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 2) {
+ QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
break;
case PIPE_PRIM_POLYGON:
{
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++, flags = edge_middle) {
if (i + 3 == count)
flags |= edge_last;
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
- break;
+ }
+ break;
default:
assert(0);
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
new file mode 100644
index 0000000000..55a8e6521d
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -0,0 +1,91 @@
+static unsigned trim( unsigned count, unsigned first, unsigned incr )
+{
+ return count - (count - first) % incr;
+}
+
+static void FUNC(struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ unsigned start = (unsigned)elts;
+
+ unsigned j;
+ unsigned first, incr;
+
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
+
+ /* Sanitize primitive length:
+ */
+ count = trim(count, first, incr);
+ if (count < first)
+ return;
+
+#if 0
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
+ start, count);
+#endif
+
+ switch (varray->input_prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ for (j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
+ varray_flush_linear(varray, start + j, nr);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ /* Always have to decompose as we've stated that this will be
+ * emitted as a line-strip.
+ */
+ for (j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
+ varray_line_loop_segment(varray, start, j, nr, nr == remaining);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ break;
+
+
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count < varray->driver_fetch_max) {
+ varray_flush_linear(varray, start, count);
+ }
+ else {
+ for ( j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
+ varray_fan_segment(varray, start, j, nr);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 6b3fb1406b..cda2987c9e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -36,8 +36,8 @@
#include "draw/draw_pt.h"
-#define CACHE_MAX 32
-#define FETCH_MAX 128
+#define CACHE_MAX 256
+#define FETCH_MAX 256
#define DRAW_MAX (16*1024)
struct vcache_frontend {
@@ -52,15 +52,27 @@ struct vcache_frontend {
unsigned draw_count;
unsigned fetch_count;
+ unsigned fetch_max;
struct draw_pt_middle_end *middle;
unsigned input_prim;
unsigned output_prim;
+
+ unsigned middle_prim;
+ unsigned opt;
};
static void vcache_flush( struct vcache_frontend *vcache )
{
+ if (vcache->middle_prim != vcache->output_prim) {
+ vcache->middle_prim = vcache->output_prim;
+ vcache->middle->prepare( vcache->middle,
+ vcache->middle_prim,
+ vcache->opt,
+ &vcache->fetch_max );
+ }
+
if (vcache->draw_count) {
vcache->middle->run( vcache->middle,
vcache->fetch_elts,
@@ -171,15 +183,15 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
unsigned i2,
unsigned i3 )
{
- const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
- const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
-
- vcache_triangle_flags( vcache,
- DRAW_PIPE_RESET_STIPPLE | omitEdge1,
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2 ),
i0, i1, i3 );
- vcache_triangle_flags( vcache,
- omitEdge2,
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
i1, i2, i3 );
}
@@ -201,21 +213,201 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
#define FUNC vcache_run
#include "draw_pt_vcache_tmp.h"
+static void rebase_uint_elts( const unsigned *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+static void rebase_ushort_elts( const ushort *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+static void rebase_ubyte_elts( const ubyte *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+
+
+static void translate_uint_elts( const unsigned *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+static void translate_ushort_elts( const ushort *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+static void translate_ubyte_elts( const ubyte *src,
+ unsigned count,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i]);
+}
+
+
+
+
+#if 0
+static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
+{
+ switch (draw->pt.user.eltSize) {
+ case 1: return PIPE_FORMAT_R8_UNORM;
+ case 2: return PIPE_FORMAT_R16_UNORM;
+ case 4: return PIPE_FORMAT_R32_UNORM;
+ default: return PIPE_FORMAT_NONE;
+ }
+}
+#endif
+
+static void vcache_check_run( struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned draw_count )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ struct draw_context *draw = vcache->draw;
+ unsigned min_index = draw->pt.user.min_index;
+ unsigned max_index = draw->pt.user.max_index;
+ unsigned index_size = draw->pt.user.eltSize;
+ unsigned fetch_count = max_index + 1 - min_index;
+ const ushort *transformed_elts;
+ ushort *storage = NULL;
+
+
+ if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
+ vcache->fetch_max,
+ draw_count);
+
+ if (max_index == 0xffffffff ||
+ fetch_count >= vcache->fetch_max ||
+ fetch_count > draw_count) {
+ if (0) debug_printf("fail\n");
+ goto fail;
+ }
+
+ if (vcache->middle_prim != vcache->input_prim) {
+ vcache->middle_prim = vcache->input_prim;
+ vcache->middle->prepare( vcache->middle,
+ vcache->middle_prim,
+ vcache->opt,
+ &vcache->fetch_max );
+ }
+
+
+ if (min_index == 0 &&
+ index_size == 2)
+ {
+ transformed_elts = (const ushort *)elts;
+ }
+ else
+ {
+ storage = MALLOC( draw_count * sizeof(ushort) );
+ if (!storage)
+ goto fail;
+
+ if (min_index == 0) {
+ switch(index_size) {
+ case 1:
+ translate_ubyte_elts( (const ubyte *)elts,
+ draw_count,
+ storage );
+ break;
+
+ case 2:
+ translate_ushort_elts( (const ushort *)elts,
+ draw_count,
+ storage );
+ break;
+
+ case 4:
+ translate_uint_elts( (const uint *)elts,
+ draw_count,
+ storage );
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ }
+ else {
+ switch(index_size) {
+ case 1:
+ rebase_ubyte_elts( (const ubyte *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 2:
+ rebase_ushort_elts( (const ushort *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 4:
+ rebase_uint_elts( (const uint *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ }
+ transformed_elts = storage;
+ }
+
+ vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
+
+ FREE(storage);
+ return;
+
+ fail:
+ vcache_run( frontend, get_elt, elts, draw_count );
+}
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
-};
@@ -232,14 +424,20 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
}
else
{
- vcache->base.run = vcache_run;
+ vcache->base.run = vcache_check_run;
}
vcache->input_prim = prim;
- vcache->output_prim = reduced_prim[prim];
+ vcache->output_prim = draw_pt_reduced_prim(prim);
vcache->middle = middle;
- middle->prepare( middle, vcache->output_prim, opt );
+ vcache->opt = opt;
+
+ /* Have to run prepare here, but try and guess a good prim for
+ * doing so:
+ */
+ vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
+ middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index cf9f394aa3..ec05bbeab4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -10,7 +10,10 @@ static void FUNC( struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
- unsigned i, flags;
+ unsigned i;
+ ushort flags;
+
+ if (0) debug_printf("%s %d\n", __FUNCTION__, count);
switch (vcache->input_prim) {
@@ -138,9 +141,9 @@ static void FUNC( struct draw_pt_front_end *frontend,
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 6d8bac5138..16c65c4317 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo);
void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
const uint8_t *data);
+
+static INLINE unsigned draw_translate_vinfo_format(unsigned format )
+{
+ switch (format) {
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ return PIPE_FORMAT_R32_FLOAT;
+ case EMIT_2F:
+ return PIPE_FORMAT_R32G32_FLOAT;
+ case EMIT_3F:
+ return PIPE_FORMAT_R32G32B32_FLOAT;
+ case EMIT_4F:
+ return PIPE_FORMAT_R32G32B32A32_FLOAT;
+ case EMIT_4UB:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+
#endif /* DRAW_VERTEX_H */
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 03fe00a951..978954e91c 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -36,6 +36,38 @@
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+
+
+
+void draw_vs_set_constants( struct draw_context *draw,
+ const float (*constants)[4],
+ unsigned size )
+{
+ if (((unsigned)constants) & 0xf) {
+ if (size > draw->vs.const_storage_size) {
+ if (draw->vs.aligned_constant_storage)
+ align_free((void *)draw->vs.aligned_constant_storage);
+ draw->vs.aligned_constant_storage = align_malloc( size, 16 );
+ }
+ memcpy( (void*)draw->vs.aligned_constant_storage,
+ constants,
+ size );
+ constants = draw->vs.aligned_constant_storage;
+ }
+
+ draw->vs.aligned_constants = constants;
+ draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
+}
+
+
+void draw_vs_set_viewport( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
+}
@@ -53,6 +85,16 @@ draw_create_vertex_shader(struct draw_context *draw,
}
}
+ if (vs)
+ {
+ uint i;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+ vs->info.output_semantic_index[i] == 0)
+ vs->position_output = i;
+ }
+ }
+
assert(vs);
return vs;
}
@@ -66,13 +108,14 @@ draw_bind_vertex_shader(struct draw_context *draw,
if (dvs)
{
- draw->vertex_shader = dvs;
- draw->num_vs_outputs = dvs->info.num_outputs;
+ draw->vs.vertex_shader = dvs;
+ draw->vs.num_vs_outputs = dvs->info.num_outputs;
+ draw->vs.position_output = dvs->position_output;
dvs->prepare( dvs, draw );
}
else {
- draw->vertex_shader = NULL;
- draw->num_vs_outputs = 0;
+ draw->vs.vertex_shader = NULL;
+ draw->vs.num_vs_outputs = 0;
}
}
@@ -81,5 +124,135 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
+ unsigned i;
+
+ for (i = 0; i < dvs->nr_varients; i++)
+ dvs->varient[i]->destroy( dvs->varient[i] );
+
+ dvs->nr_varients = 0;
+
dvs->delete( dvs );
}
+
+
+
+boolean
+draw_vs_init( struct draw_context *draw )
+{
+ tgsi_exec_machine_init(&draw->vs.machine);
+
+ /* FIXME: give this machine thing a proper constructor:
+ */
+ draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ if (!draw->vs.machine.Inputs)
+ return FALSE;
+
+ draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ if (!draw->vs.machine.Outputs)
+ return FALSE;
+
+ draw->vs.emit_cache = translate_cache_create();
+ if (!draw->vs.emit_cache)
+ return FALSE;
+
+ draw->vs.fetch_cache = translate_cache_create();
+ if (!draw->vs.fetch_cache)
+ return FALSE;
+
+ draw->vs.aos_machine = draw_vs_aos_machine();
+ if (!draw->vs.aos_machine)
+ return FALSE;
+
+ return TRUE;
+}
+
+void
+draw_vs_destroy( struct draw_context *draw )
+{
+ if (draw->vs.machine.Inputs)
+ align_free(draw->vs.machine.Inputs);
+
+ if (draw->vs.machine.Outputs)
+ align_free(draw->vs.machine.Outputs);
+
+ if (draw->vs.fetch_cache)
+ translate_cache_destroy(draw->vs.fetch_cache);
+
+ if (draw->vs.emit_cache)
+ translate_cache_destroy(draw->vs.emit_cache);
+
+ if (draw->vs.aos_machine)
+ draw_vs_aos_machine_destroy(draw->vs.aos_machine);
+
+ if (draw->vs.aligned_constant_storage)
+ align_free((void*)draw->vs.aligned_constant_storage);
+
+ tgsi_exec_machine_free_data(&draw->vs.machine);
+
+}
+
+
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient;
+ unsigned i;
+
+ /* Lookup existing varient:
+ */
+ for (i = 0; i < vs->nr_varients; i++)
+ if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0)
+ return vs->varient[i];
+
+ /* Else have to create a new one:
+ */
+ varient = vs->create_varient( vs, key );
+ if (varient == NULL)
+ return NULL;
+
+ /* Add it to our list, could be smarter:
+ */
+ if (vs->nr_varients < Elements(vs->varient)) {
+ vs->varient[vs->nr_varients++] = varient;
+ }
+ else {
+ vs->last_varient++;
+ vs->last_varient %= Elements(vs->varient);
+ vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
+ vs->varient[vs->last_varient] = varient;
+ }
+
+ /* Done
+ */
+ return varient;
+}
+
+
+struct translate *
+draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.fetch ||
+ translate_key_compare(&draw->vs.fetch->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key);
+ }
+
+ return draw->vs.fetch;
+}
+
+struct translate *
+draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.emit ||
+ translate_key_compare(&draw->vs.emit->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key);
+ }
+
+ return draw->vs.emit;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index f9772b83b8..45992d1986 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -38,16 +38,88 @@
struct draw_context;
struct pipe_shader_state;
+struct draw_varient_input
+{
+ enum pipe_format format;
+ unsigned buffer;
+ unsigned offset;
+};
+
+struct draw_varient_output
+{
+ enum pipe_format format; /* output format */
+ unsigned vs_output:8; /* which vertex shader output is this? */
+ unsigned offset:24; /* offset into output vertex */
+};
+
+struct draw_varient_element {
+ struct draw_varient_input in;
+ struct draw_varient_output out;
+};
+
+struct draw_vs_varient_key {
+ unsigned output_stride;
+ unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */
+ unsigned nr_inputs:8;
+ unsigned nr_outputs:8;
+ unsigned viewport:1;
+ unsigned clip:1;
+ unsigned pad:5;
+ struct draw_varient_element element[PIPE_MAX_ATTRIBS];
+};
+
+struct draw_vs_varient;
+
+
+struct draw_vs_varient {
+ struct draw_vs_varient_key key;
+
+ struct draw_vertex_shader *vs;
+
+ void (*set_input)( struct draw_vs_varient *,
+ unsigned i,
+ const void *ptr,
+ unsigned stride );
+
+ void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
+
+ void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
+ void (*destroy)( struct draw_vs_varient * );
+};
+
+
/**
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
+ struct draw_context *draw;
/* This member will disappear shortly:
*/
struct pipe_shader_state state;
struct tgsi_shader_info info;
+ unsigned position_output;
+
+ /* Extracted from shader:
+ */
+ const float (*immediates)[4];
+
+ /*
+ */
+ struct draw_vs_varient *varient[16];
+ unsigned nr_varients;
+ unsigned last_varient;
+ struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
+ const struct draw_vs_varient_key *key );
+
void (*prepare)( struct draw_vertex_shader *shader,
struct draw_context *draw );
@@ -68,6 +140,15 @@ struct draw_vertex_shader {
};
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *base,
+ const struct draw_vs_varient_key *key );
+
+
+/********************************************************************************
+ * Internal functions:
+ */
+
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
@@ -81,7 +162,59 @@ draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
+
+struct draw_vs_varient_key;
+struct draw_vertex_shader;
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
+/********************************************************************************
+ * Helpers for vs implementations that don't do their own fetch/emit varients.
+ * Means these can be shared between shaders.
+ */
+struct translate;
+struct translate_key;
+
+struct translate *draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key );
+
+
+struct translate *draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key );
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
+static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key )
+{
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element);
+}
+
+static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a,
+ const struct draw_vs_varient_key *b )
+{
+ int keysize = draw_vs_varient_keysize(a);
+ return memcmp(a, b, keysize);
+}
+
+
+struct aos_machine *draw_vs_aos_machine( void );
+void draw_vs_aos_machine_destroy( struct aos_machine *machine );
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] );
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport );
+
+
#define MAX_TGSI_VERTICES 4
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
new file mode 100644
index 0000000000..1f926b3e85
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -0,0 +1,2138 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 6.3
+ *
+ * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Translate tgsi vertex programs to x86/x87/SSE/SSE2 machine code
+ * using the rtasm runtime assembler. Based on the old
+ * t_vb_arb_program_sse.c
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+#define DISASSEM 0
+
+static const char *files[] =
+{
+ "NULL",
+ "CONST",
+ "IN",
+ "OUT",
+ "TEMP",
+ "SAMP",
+ "ADDR",
+ "IMM",
+ "INTERNAL",
+};
+
+static INLINE boolean eq( struct x86_reg a,
+ struct x86_reg b )
+{
+ return (a.file == b.file &&
+ a.idx == b.idx &&
+ a.mod == b.mod &&
+ a.disp == b.disp);
+}
+
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned which_reg, /* quick hack */
+ unsigned value )
+{
+ struct x86_reg reg;
+
+ if (which_reg == 0)
+ reg = cp->temp_EBP;
+ else
+ reg = cp->tmp_EAX;
+
+ if (cp->x86_reg[which_reg] != value) {
+ unsigned offset;
+
+ switch (value) {
+ case X86_IMMEDIATES:
+ assert(which_reg == 0);
+ offset = Offset(struct aos_machine, immediates);
+ break;
+ case X86_CONSTANTS:
+ assert(which_reg == 1);
+ offset = Offset(struct aos_machine, constants);
+ break;
+ case X86_ATTRIBS:
+ assert(which_reg == 0);
+ offset = Offset(struct aos_machine, attrib);
+ break;
+ default:
+ assert(0);
+ offset = 0;
+ }
+
+
+ x86_mov(cp->func, reg,
+ x86_make_disp(cp->machine_EDX, offset));
+
+ cp->x86_reg[which_reg] = value;
+ }
+
+ return reg;
+}
+
+
+static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ptr = cp->machine_EDX;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, input[idx]));
+
+ case TGSI_FILE_OUTPUT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, output[idx]));
+
+ case TGSI_FILE_TEMPORARY:
+ return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
+
+ case AOS_FILE_INTERNAL:
+ return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
+
+ case TGSI_FILE_IMMEDIATE:
+ return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float));
+
+ case TGSI_FILE_CONSTANT:
+ return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float));
+
+ default:
+ ERROR(cp, "unknown reg file");
+ return x86_make_reg(0,0);
+ }
+}
+
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+
+
+static void spill( struct aos_compilation *cp, unsigned idx )
+{
+ if (!cp->xmm[idx].dirty ||
+ (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */
+ cp->xmm[idx].file != TGSI_FILE_OUTPUT &&
+ cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) {
+ ERROR(cp, "invalid spill");
+ return;
+ }
+ else {
+ struct x86_reg oldval = get_reg_ptr(cp,
+ cp->xmm[idx].file,
+ cp->xmm[idx].idx);
+
+ if (0) debug_printf("\nspill %s[%d]",
+ files[cp->xmm[idx].file],
+ cp->xmm[idx].idx);
+
+ assert(cp->xmm[idx].dirty);
+ sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx));
+ cp->xmm[idx].dirty = 0;
+ }
+}
+
+
+static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file != file_XMM ||
+ cp->xmm[reg.idx].file != TGSI_FILE_NULL)
+ {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movaps(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+ return reg;
+}
+
+static struct x86_reg get_xmm( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file != file_XMM)
+ {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movaps(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+ return reg;
+}
+
+
+/* Allocate an empty xmm register, either as a temporary or later to
+ * "adopt" as a shader reg.
+ */
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
+{
+ unsigned i;
+ unsigned oldest = 0;
+ boolean found = FALSE;
+
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used != cp->insn_counter &&
+ cp->xmm[i].file == TGSI_FILE_NULL) {
+ oldest = i;
+ found = TRUE;
+ }
+
+ if (!found) {
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)
+ oldest = i;
+ }
+
+ /* Need to write out the old value?
+ */
+ if (cp->xmm[oldest].dirty)
+ spill(cp, oldest);
+
+ assert(cp->xmm[oldest].last_used != cp->insn_counter);
+
+ cp->xmm[oldest].file = TGSI_FILE_NULL;
+ cp->xmm[oldest].idx = 0;
+ cp->xmm[oldest].dirty = 0;
+ cp->xmm[oldest].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, oldest);
+}
+
+void aos_release_xmm_reg( struct aos_compilation *cp,
+ unsigned idx )
+{
+ cp->xmm[idx].file = TGSI_FILE_NULL;
+ cp->xmm[idx].idx = 0;
+ cp->xmm[idx].dirty = 0;
+ cp->xmm[idx].last_used = 0;
+}
+
+
+
+
+/* Mark an xmm reg as holding the current copy of a shader reg.
+ */
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty )
+{
+ unsigned i;
+
+ if (reg.file != file_XMM) {
+ assert(0);
+ return;
+ }
+
+
+ /* If any xmm reg thinks it holds this shader reg, break the
+ * illusion.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ /* If an xmm reg is already holding this shader reg, take into account its
+ * dirty flag...
+ */
+ dirty |= cp->xmm[i].dirty;
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
+ cp->xmm[reg.idx].file = file;
+ cp->xmm[reg.idx].idx = idx;
+ cp->xmm[reg.idx].dirty = dirty;
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+}
+
+
+/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate.
+ */
+static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ unsigned i;
+
+ /* Ensure the in-memory copy of this reg is up-to-date
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx &&
+ cp->xmm[i].dirty) {
+ spill(cp, i);
+ }
+ }
+
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+/* As above, but return a pointer. Note - this pointer may alias
+ * those returned by get_arg_ptr().
+ */
+static struct x86_reg get_dst_ptr( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *dst )
+{
+ unsigned file = dst->DstRegister.File;
+ unsigned idx = dst->DstRegister.Index;
+ unsigned i;
+
+
+ /* Ensure in-memory copy of this reg is up-to-date and invalidate
+ * any xmm copies.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+
+
+
+/* Return an XMM reg if the argument is resident, otherwise return a
+ * base+offset pointer to the saved value.
+ */
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ cp->xmm[i].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, i);
+ }
+ }
+
+ /* If not found in the XMM register file, return an indirect
+ * reference to the in-memory copy:
+ */
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+
+static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg reg = get_xmm( cp,
+ aos_get_shader_reg( cp, file, idx ) );
+
+ aos_adopt_xmm_reg( cp,
+ reg,
+ file,
+ idx,
+ FALSE );
+
+ return reg;
+}
+
+
+
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+
+
+
+/* Emulate pshufd insn in regular SSE, if necessary:
+ */
+static void emit_pshufd( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ ubyte shuf )
+{
+ if (cp->have_sse2) {
+ sse2_pshufd(cp->func, dst, arg0, shuf);
+ }
+ else {
+ if (!eq(dst, arg0))
+ sse_movaps(cp->func, dst, arg0);
+
+ sse_shufps(cp->func, dst, dst, shuf);
+ }
+}
+
+/* load masks (pack into negs??)
+ * pshufd - shuffle according to writemask
+ * and - result, mask
+ * nand - dest, mask
+ * or - dest, result
+ */
+static boolean mask_write( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ unsigned mask )
+{
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((mask & 1) ? 2 : 3,
+ (mask & 2) ? 2 : 3,
+ (mask & 4) ? 2 : 3,
+ (mask & 8) ? 2 : 3));
+
+ sse_andps(cp->func, dst, tmp);
+ sse_andnps(cp->func, tmp, result);
+ sse_orps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+
+
+
+/* Helper for writemask:
+ */
+static boolean emit_shuf_copy2( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ struct x86_reg arg1,
+ ubyte shuf )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, dst, arg1, shuf);
+ emit_pshufd(cp, tmp, arg0, shuf);
+ sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W));
+ emit_pshufd(cp, dst, dst, shuf);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+
+
+#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
+
+
+/* Locate a source register and perform any required (simple) swizzle.
+ *
+ * Just fail on complex swizzles at this point.
+ */
+static struct x86_reg fetch_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src )
+{
+ struct x86_reg arg0 = aos_get_shader_reg(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+ unsigned i;
+ ubyte swz = 0;
+ unsigned negs = 0;
+ unsigned abs = 0;
+
+ for (i = 0; i < 4; i++) {
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ case TGSI_EXTSWIZZLE_ONE:
+ ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2");
+ break;
+
+ default:
+ swz |= (swizzle & 0x3) << (i * 2);
+ break;
+ }
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ negs |= (1<<i);
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+
+ case TGSI_UTIL_SIGN_CLEAR:
+ abs |= (1<<i);
+ break;
+
+ default:
+ ERROR(cp, "unsupported sign-mode");
+ break;
+ }
+ }
+
+ if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) {
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ if (swz != SSE_SWIZZLE_NOOP)
+ emit_pshufd(cp, dst, arg0, swz);
+ else
+ sse_movaps(cp->func, dst, arg0);
+
+ if (negs && negs != 0xf) {
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ /* Load 1,-1,0,0
+ * Use neg as arg to pshufd
+ * Multiply
+ */
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((negs & 1) ? 1 : 0,
+ (negs & 2) ? 1 : 0,
+ (negs & 4) ? 1 : 0,
+ (negs & 8) ? 1 : 0));
+ sse_mulps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ }
+ else if (negs) {
+ struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
+ sse_mulps(cp->func, dst, imm_negs);
+ }
+
+
+ if (abs && abs != 0xf) {
+ ERROR(cp, "unsupported partial abs");
+ }
+ else if (abs) {
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, dst);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ }
+
+ return dst;
+ }
+
+ return arg0;
+}
+
+static void x87_fld_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src,
+ unsigned channel )
+{
+ struct x86_reg arg0 = aos_get_shader_reg_ptr(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ x87_fldz( cp->func );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ x87_fld1( cp->func );
+ break;
+
+ default:
+ x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
+ break;
+ }
+
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ /* Flip the sign:
+ */
+ x87_fchs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+
+ case TGSI_UTIL_SIGN_CLEAR:
+ x87_fabs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ x87_fabs( cp->func );
+ x87_fchs( cp->func );
+ break;
+
+ default:
+ ERROR(cp, "unsupported sign-mode");
+ break;
+ }
+}
+
+
+
+
+
+
+/* Used to implement write masking. This and most of the other instructions
+ * here would be easier to implement if there had been a translation
+ * to a 2 argument format (dst/arg0, arg1) at the shader level before
+ * attempting to translate to x86/sse code.
+ */
+static void store_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ struct x86_reg dst;
+
+ switch (reg->DstRegister.WriteMask) {
+ case 0:
+ return;
+
+ case TGSI_WRITEMASK_XYZW:
+ aos_adopt_xmm_reg(cp,
+ get_xmm_writable(cp, result),
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+ return;
+ default:
+ break;
+ }
+
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, get_xmm(cp, result));
+ break;
+
+ case TGSI_WRITEMASK_ZW:
+ sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_XY:
+ result = get_xmm_writable(cp, result);
+ sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
+ dst = result;
+ break;
+
+ case TGSI_WRITEMASK_YZW:
+ result = get_xmm_writable(cp, result);
+ sse_movss(cp->func, result, dst);
+ dst = result;
+ break;
+
+ default:
+ mask_write(cp, dst, result, reg->DstRegister.WriteMask);
+ break;
+ }
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+
+}
+
+static void inject_scalar( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ ubyte swizzle )
+{
+ sse_shufps(cp->func, dst, dst, swizzle);
+ sse_movss(cp->func, dst, result);
+ sse_shufps(cp->func, dst, dst, swizzle);
+}
+
+
+static void store_scalar_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ unsigned writemask = reg->DstRegister.WriteMask;
+ struct x86_reg dst;
+
+ if (writemask != TGSI_WRITEMASK_X &&
+ writemask != TGSI_WRITEMASK_Y &&
+ writemask != TGSI_WRITEMASK_Z &&
+ writemask != TGSI_WRITEMASK_W &&
+ writemask != 0)
+ {
+ result = get_xmm_writable(cp, result); /* already true, right? */
+ sse_shufps(cp->func, result, result, SHUF(X,X,X,X));
+ store_dest(cp, reg, result);
+ return;
+ }
+
+ result = get_xmm(cp, result);
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, result);
+ break;
+
+ case TGSI_WRITEMASK_Y:
+ inject_scalar(cp, dst, result, SHUF(Y, X, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_Z:
+ inject_scalar(cp, dst, result, SHUF(Z, Y, X, W));
+ break;
+
+ case TGSI_WRITEMASK_W:
+ inject_scalar(cp, dst, result, SHUF(W, Y, Z, X));
+ break;
+
+ default:
+ break;
+ }
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+}
+
+
+
+static void x87_fst_or_nop( struct x86_function *func,
+ unsigned writemask,
+ unsigned channel,
+ struct x86_reg ptr )
+{
+ assert(ptr.file == file_REG32);
+ if (writemask & (1<<channel))
+ x87_fst( func, x86_make_disp(ptr, channel * sizeof(float)) );
+}
+
+static void x87_fstp_or_pop( struct x86_function *func,
+ unsigned writemask,
+ unsigned channel,
+ struct x86_reg ptr )
+{
+ assert(ptr.file == file_REG32);
+ if (writemask & (1<<channel))
+ x87_fstp( func, x86_make_disp(ptr, channel * sizeof(float)) );
+ else
+ x87_fstp( func, x86_make_reg( file_x87, 0 ));
+}
+
+
+
+/*
+ */
+static void x87_fstp_dest4( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *dst )
+{
+ struct x86_reg ptr = get_dst_ptr(cp, dst);
+ unsigned writemask = dst->DstRegister.WriteMask;
+
+ x87_fst_or_nop(cp->func, writemask, 0, ptr);
+ x87_fst_or_nop(cp->func, writemask, 1, ptr);
+ x87_fst_or_nop(cp->func, writemask, 2, ptr);
+ x87_fstp_or_pop(cp->func, writemask, 3, ptr);
+}
+
+/* Save current x87 state and put it into single precision mode.
+ */
+static void save_fpu_state( struct aos_compilation *cp )
+{
+ x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
+}
+
+static void restore_fpu_state( struct aos_compilation *cp )
+{
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
+}
+
+static void set_fpu_round_neg_inf( struct aos_compilation *cp )
+{
+ if (cp->fpucntl != FPU_RND_NEG) {
+ cp->fpucntl = FPU_RND_NEG;
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_neg_inf)));
+ }
+}
+
+static void set_fpu_round_nearest( struct aos_compilation *cp )
+{
+ if (cp->fpucntl != FPU_RND_NEAREST) {
+ cp->fpucntl = FPU_RND_NEAREST;
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_nearest)));
+ }
+}
+
+
+static void x87_emit_ex2( struct aos_compilation *cp )
+{
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ int stack = cp->func->x87_stack;
+
+// set_fpu_round_neg_inf( cp );
+
+ x87_fld(cp->func, st0); /* a a */
+ x87_fprndint( cp->func ); /* int(a) a*/
+ x87_fsubr(cp->func, st1, st0); /* int(a) frc(a) */
+ x87_fxch(cp->func, st1); /* frc(a) int(a) */
+ x87_f2xm1(cp->func); /* (2^frc(a))-1 int(a) */
+ x87_fld1(cp->func); /* 1 (2^frc(a))-1 int(a) */
+ x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */
+ x87_fscale(cp->func); /* (2^frac(a)*2^int(int(a))) int(a) */
+ /* 2^a int(a) */
+ x87_fstp(cp->func, st1); /* 2^a */
+
+ assert( stack == cp->func->x87_stack);
+
+}
+
+static void PIPE_CDECL print_reg( const char *msg,
+ const float *reg )
+{
+ debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
+}
+
+static void emit_print( struct aos_compilation *cp,
+ const char *message, /* must point to a static string! */
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg arg = aos_get_shader_reg_ptr( cp, file, idx );
+ unsigned i;
+
+ /* There shouldn't be anything on the x87 stack. Can add this
+ * capacity later if need be.
+ */
+ assert(cp->func->x87_stack == 0);
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too. We're obviously not concerned about performance on this
+ * debug path, so here goes:
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+
+ /* Push the arguments:
+ */
+ x86_lea( cp->func, ecx, arg );
+ x86_push( cp->func, ecx );
+ x86_push_imm32( cp->func, (int)message );
+
+ /* Call the helper. Could call debug_printf directly, but
+ * print_reg is a nice place to put a breakpoint if need be.
+ */
+ x86_mov_reg_imm( cp->func, ecx, (int)print_reg );
+ x86_call( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ /* Pop caller-save regs
+ */
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Done...
+ */
+}
+
+/**
+ * The traditional instructions. All operate on internal registers
+ * and ignore write masks and swizzling issues.
+ */
+
+static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, arg0);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, arg0);
+
+ store_dest(cp, &op->FullDstRegisters[0], tmp);
+ return TRUE;
+}
+
+static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_addps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fcos(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+/* The dotproduct instructions don't really do that well in sse:
+ * XXX: produces wrong results -- disabled.
+ */
+static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+ /* Now the hard bit: sum the first 3 values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values (from DP3):
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+ emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
+ sse_addss(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+
+/* dst[0] = 1.0 * 1.0F; */
+/* dst[1] = arg0[1] * arg1[1]; */
+/* dst[2] = arg0[2] * 1.0; */
+/* dst[3] = 1.0 * arg1[3]; */
+
+ emit_shuf_copy2(cp, dst, arg0, ones, SHUF(X,W,Z,Y));
+ emit_shuf_copy2(cp, tmp, arg1, ones, SHUF(X,Z,Y,W));
+ sse_mulps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld1(cp->func); /* 1 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */
+ x87_fyl2x(cp->func); /* log2(a0) */
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_emit_ex2(cp);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fprndint( cp->func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_nearest( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fprndint( cp->func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* suck all the source values onto the stack before writing out any
+ * dst, which may alias...
+ */
+ for (i = 3; i >= 0; i--) {
+ if (writemask & (1<<i)) {
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ x87_fld(cp->func, st0); /* a a */
+ x87_fprndint( cp->func ); /* flr(a) a */
+ x87_fsubp(cp->func, st1); /* frc(a) */
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ unsigned lit_count = cp->lit_count++;
+ struct x86_reg result, arg0;
+ unsigned i;
+
+#if 1
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+#endif
+
+ if (writemask != TGSI_WRITEMASK_XYZW)
+ result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0]));
+ else
+ result = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+
+
+ arg0 = fetch_src( cp, &op->FullSrcRegisters[0] );
+ if (arg0.file == file_XMM) {
+ struct x86_reg tmp = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, tmp[1]));
+ sse_movaps( cp->func, tmp, arg0 );
+ arg0 = tmp;
+ }
+
+
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ /* Push the arguments:
+ */
+ x86_push_imm32( cp->func, lit_count );
+
+ x86_lea( cp->func, ecx, arg0 );
+ x86_push( cp->func, ecx );
+
+ x86_lea( cp->func, ecx, result );
+ x86_push( cp->func, ecx );
+
+ x86_push( cp->func, cp->machine_EDX );
+
+ if (lit_count < MAX_LIT_INFO) {
+ x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX,
+ Offset(struct aos_machine, lit_info) +
+ lit_count * sizeof(struct lit_info) +
+ Offset(struct lit_info, func)));
+ }
+ else {
+ x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
+ }
+
+ x86_call( cp->func, ecx );
+
+ x86_pop( cp->func, ecx ); /* fixme... */
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ if (writemask != TGSI_WRITEMASK_XYZW) {
+ store_dest( cp,
+ &op->FullDstRegisters[0],
+ get_xmm_writable( cp, result ) );
+ }
+
+ return TRUE;
+}
+
+#if 0
+static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+
+ if (writemask & TGSI_WRITEMASK_YZ) {
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st2 = x86_make_reg(file_x87, 2);
+
+ /* a1' = a1 <= 0 ? 1 : a1;
+ */
+ x87_fldz(cp->func); /* 1 0 */
+#if 1
+ x87_fld1(cp->func); /* 1 0 */
+#else
+ /* Correct but slow due to fp exceptions generated in fyl2x - fix me.
+ */
+ x87_fldz(cp->func); /* 1 0 */
+#endif
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */
+ x87_fcomi(cp->func, st2); /* a1 1 0 */
+ x87_fcmovb(cp->func, st1); /* a1' 1 0 */
+ x87_fstp(cp->func, st1); /* a1' 0 */
+ x87_fstp(cp->func, st1); /* a1' */
+
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */
+ x87_fxch(cp->func, st1); /* a1' a3 */
+
+
+ /* Compute pow(a1, a3)
+ */
+ x87_fyl2x(cp->func); /* a3*log2(a1) */
+ x87_emit_ex2( cp ); /* 2^(a3*log2(a1)) */
+
+
+ /* a0' = max2(a0, 0):
+ */
+ x87_fldz(cp->func); /* 0 r2 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */
+ x87_fcomi(cp->func, st1);
+ x87_fcmovb(cp->func, st1); /* a0' 0 r2 */
+
+ x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */
+
+ x87_fcomi(cp->func, st1); /* a0' 0 r2 */
+ x87_fcmovnbe(cp->func, st2); /* r2' 0' r2 */
+
+ x87_fstp_or_pop(cp->func, writemask, 2, dst); /* 0 r2 */
+ x87_fpop(cp->func); /* r2 */
+ x87_fpop(cp->func);
+ }
+
+ if (writemask & TGSI_WRITEMASK_XW) {
+ x87_fld1(cp->func);
+ x87_fst_or_nop(cp->func, writemask, 0, dst);
+ x87_fstp_or_pop(cp->func, writemask, 3, dst);
+ }
+
+ return TRUE;
+}
+#endif
+
+
+
+static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_maxps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_minps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ /* potentially nothing to do */
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]);
+
+ /* If we can't clobber old contents of arg0, get a temporary & copy
+ * it there, then clobber it...
+ */
+ arg0 = get_xmm_writable(cp, arg0);
+
+ sse_mulps(cp->func, arg0, arg1);
+ sse_addps(cp->func, arg0, arg2);
+ store_dest(cp, &op->FullDstRegisters[0], arg0);
+ return TRUE;
+}
+
+/* A wrapper for powf().
+ * Makes sure it is cdecl and operates on floats.
+ */
+static float PIPE_CDECL _powerf( float x, float y )
+{
+ return powf( x, y );
+}
+
+/* Really not sufficient -- need to check for conditions that could
+ * generate inf/nan values, which will slow things down hugely.
+ */
+static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+#if 0
+ x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
+ x87_fyl2x(cp->func); /* a1*log2(a0) */
+
+ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */
+
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+#else
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) );
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+#endif
+ return TRUE;
+}
+
+
+static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ if (cp->have_sse2) {
+ sse2_rcpss(cp->func, dst, arg0);
+ /* extend precision here...
+ */
+ }
+ else {
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ sse_movss(cp->func, dst, ones);
+ sse_divss(cp->func, dst, arg0);
+ }
+
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+/* Although rsqrtps() and rcpps() are low precision on some/all SSE
+ * implementations, it is possible to improve its precision at
+ * fairly low cost, using a newton/raphson step, as below:
+ *
+ * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+ * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ * or:
+ * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)]
+ *
+ *
+ * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+
+ if (0) {
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
+ sse_rsqrtss(cp->func, r, arg0);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+ return TRUE;
+ }
+ else {
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
+
+ struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
+ struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
+ struct x86_reg src = get_xmm_writable( cp, arg0 );
+
+ sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
+ sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, src, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+ return TRUE;
+ }
+}
+
+
+static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fsin(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+
+static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_cmpps(cp->func, dst, arg1, cc_LessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
+
+ sse_subps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg tmp0 = aos_get_xmm_reg(cp);
+ struct x86_reg tmp1 = aos_get_xmm_reg(cp);
+
+ emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp1, arg0);
+ emit_pshufd(cp, tmp0, arg0, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp0, arg1);
+ sse_subps(cp->func, tmp1, tmp0);
+ sse_shufps(cp->func, tmp1, tmp1, SHUF(Y, Z, X, W));
+
+/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */
+/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */
+/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */
+/* dst[3] is undef */
+
+
+ aos_release_xmm_reg(cp, tmp0.idx);
+ store_dest(cp, &op->FullDstRegisters[0], tmp1);
+ return TRUE;
+}
+
+
+
+static boolean
+emit_instruction( struct aos_compilation *cp,
+ struct tgsi_full_instruction *inst )
+{
+ x87_assert_stack_empty(cp->func);
+
+ switch( inst->Instruction.Opcode ) {
+ case TGSI_OPCODE_MOV:
+ return emit_MOV( cp, inst );
+
+ case TGSI_OPCODE_LIT:
+ return emit_LIT(cp, inst);
+
+ case TGSI_OPCODE_RCP:
+ return emit_RCP(cp, inst);
+
+ case TGSI_OPCODE_RSQ:
+ return emit_RSQ(cp, inst);
+
+ case TGSI_OPCODE_EXP:
+ /*return emit_EXP(cp, inst);*/
+ return FALSE;
+
+ case TGSI_OPCODE_LOG:
+ /*return emit_LOG(cp, inst);*/
+ return FALSE;
+
+ case TGSI_OPCODE_MUL:
+ return emit_MUL(cp, inst);
+
+ case TGSI_OPCODE_ADD:
+ return emit_ADD(cp, inst);
+
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(cp, inst);
+
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(cp, inst);
+
+ case TGSI_OPCODE_DST:
+ return emit_DST(cp, inst);
+
+ case TGSI_OPCODE_MIN:
+ return emit_MIN(cp, inst);
+
+ case TGSI_OPCODE_MAX:
+ return emit_MAX(cp, inst);
+
+ case TGSI_OPCODE_SLT:
+ return emit_SLT(cp, inst);
+
+ case TGSI_OPCODE_SGE:
+ return emit_SGE(cp, inst);
+
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(cp, inst);
+
+ case TGSI_OPCODE_SUB:
+ return emit_SUB(cp, inst);
+
+ case TGSI_OPCODE_LERP:
+// return emit_LERP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FRAC:
+ return emit_FRC(cp, inst);
+
+ case TGSI_OPCODE_CLAMP:
+// return emit_CLAMP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FLOOR:
+ return emit_FLR(cp, inst);
+
+ case TGSI_OPCODE_ROUND:
+ return emit_RND(cp, inst);
+
+ case TGSI_OPCODE_EXPBASE2:
+#if 0
+ /* this seems to fail for "larger" exponents.
+ * See glean tvertProg1's EX2 test.
+ */
+ return emit_EX2(cp, inst);
+#else
+ return FALSE;
+#endif
+
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_LG2(cp, inst);
+
+ case TGSI_OPCODE_POWER:
+ return emit_POW(cp, inst);
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ return emit_XPD(cp, inst);
+
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(cp, inst);
+
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(cp, inst);
+
+ case TGSI_OPCODE_COS:
+ return emit_COS(cp, inst);
+
+ case TGSI_OPCODE_SIN:
+ return emit_SIN(cp, inst);
+
+ case TGSI_OPCODE_END:
+ return TRUE;
+
+ default:
+ return FALSE;
+ }
+}
+
+
+static boolean emit_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output );
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+ sse_mulps(cp->func, pos, scale);
+ sse_addps(cp->func, pos, translate);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output,
+ TRUE );
+ return TRUE;
+}
+
+
+/* This is useful to be able to see the results on softpipe. Doesn't
+ * do proper clipping, just assumes the backend can do it during
+ * rasterization -- for debug only...
+ */
+static boolean emit_rhw_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output);
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+
+
+ emit_pshufd(cp, tmp, pos, SHUF(W, W, W, W));
+ sse2_rcpss(cp->func, tmp, tmp);
+ sse_shufps(cp->func, tmp, tmp, SHUF(X, X, X, X));
+
+ sse_mulps(cp->func, pos, scale);
+ sse_mulps(cp->func, pos, tmp);
+ sse_addps(cp->func, pos, translate);
+
+ /* Set pos[3] = w
+ */
+ mask_write(cp, pos, tmp, TGSI_WRITEMASK_W);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ cp->vaos->draw->vs.position_output,
+ TRUE );
+ return TRUE;
+}
+
+
+#if 0
+static boolean note_immediate( struct aos_compilation *cp,
+ struct tgsi_full_immediate *imm )
+{
+ unsigned pos = cp->num_immediates++;
+ unsigned j;
+
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+
+ return TRUE;
+}
+#endif
+
+
+
+
+static void find_last_write_outputs( struct aos_compilation *cp )
+{
+ struct tgsi_parse_context parse;
+ unsigned this_instruction = 0;
+ unsigned i;
+
+ tgsi_parse_init( &parse, cp->vaos->base.vs->state.tokens );
+
+ while (!tgsi_parse_end_of_tokens( &parse )) {
+
+ tgsi_parse_token( &parse );
+
+ if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) {
+ if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File ==
+ TGSI_FILE_OUTPUT)
+ {
+ unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index;
+ cp->output_last_write[idx] = this_instruction;
+ }
+ }
+
+ this_instruction++;
+ }
+
+ tgsi_parse_free( &parse );
+}
+
+
+#define ARG_MACHINE 1
+#define ARG_START_ELTS 2
+#define ARG_COUNT 3
+#define ARG_OUTBUF 4
+
+
+static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
+ boolean linear )
+{
+ struct tgsi_parse_context parse;
+ struct aos_compilation cp;
+ unsigned fixup, label;
+
+ tgsi_parse_init( &parse, varient->base.vs->state.tokens );
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.insn_counter = 1;
+ cp.vaos = varient;
+ cp.have_sse2 = 1;
+ cp.func = &varient->func[ linear ? 0 : 1 ];
+
+ cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX);
+ cp.idx_EBX = x86_make_reg(file_REG32, reg_BX);
+ cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
+ cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
+ cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
+ cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
+ cp.stack_ESP = x86_make_reg( file_REG32, reg_SP );
+
+ x86_init_func(cp.func);
+
+ find_last_write_outputs(&cp);
+
+ x86_push(cp.func, cp.idx_EBX);
+ x86_push(cp.func, cp.count_ESI);
+ x86_push(cp.func, cp.temp_EBP);
+
+
+ /* Load arguments into regs:
+ */
+ x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
+ x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
+ x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
+ x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
+
+
+ /* Compare count to zero and possibly bail.
+ */
+ x86_xor(cp.func, cp.tmp_EAX, cp.tmp_EAX);
+ x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
+ fixup = x86_jcc_forward(cp.func, cc_E);
+
+
+ save_fpu_state( &cp );
+ set_fpu_round_nearest( &cp );
+
+ /* Note address for loop jump
+ */
+ label = x86_get_label(cp.func);
+ {
+ /* Fetch inputs... TODO: fetch lazily...
+ */
+ if (!aos_fetch_inputs( &cp, linear ))
+ goto fail;
+
+ /* Emit the shader:
+ */
+ while( !tgsi_parse_end_of_tokens( &parse ) && !cp.error )
+ {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
+ if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
+ goto fail;
+#endif
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (DISASSEM)
+ tgsi_dump_instruction( &parse.FullToken.FullInstruction, cp.insn_counter );
+
+ if (!emit_instruction( &cp, &parse.FullToken.FullInstruction ))
+ goto fail;
+ break;
+ }
+
+ x87_assert_stack_empty(cp.func);
+ cp.insn_counter++;
+
+ if (DISASSEM)
+ debug_printf("\n");
+ }
+
+
+ {
+ unsigned i;
+ for (i = 0; i < 8; i++) {
+ if (cp.xmm[i].file != TGSI_FILE_OUTPUT) {
+ cp.xmm[i].file = TGSI_FILE_NULL;
+ cp.xmm[i].dirty = 0;
+ }
+ }
+ }
+
+ if (cp.error)
+ goto fail;
+
+ if (cp.vaos->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ emit_rhw_viewport(&cp);
+ }
+ else if (cp.vaos->base.key.viewport) {
+ emit_viewport(&cp);
+ }
+
+ /* Emit output... TODO: do this eagerly after the last write to a
+ * given output.
+ */
+ if (!aos_emit_outputs( &cp ))
+ goto fail;
+
+
+ /* Next vertex:
+ */
+ x86_lea(cp.func,
+ cp.outbuf_ECX,
+ x86_make_disp(cp.outbuf_ECX,
+ cp.vaos->base.key.output_stride));
+
+ /* Incr index
+ */
+ if (linear) {
+ x86_inc(cp.func, cp.idx_EBX);
+ }
+ else {
+ x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4));
+ }
+
+ }
+ /* decr count, loop if not zero
+ */
+ x86_dec(cp.func, cp.count_ESI);
+ x86_jcc(cp.func, cc_NZ, label);
+
+ restore_fpu_state(&cp);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(cp.func, fixup);
+
+ /* Exit mmx state?
+ */
+ if (cp.func->need_emms)
+ mmx_emms(cp.func);
+
+ x86_pop(cp.func, cp.temp_EBP);
+ x86_pop(cp.func, cp.count_ESI);
+ x86_pop(cp.func, cp.idx_EBX);
+
+ x87_assert_stack_empty(cp.func);
+ x86_ret(cp.func);
+
+ tgsi_parse_free( &parse );
+ return !cp.error;
+
+ fail:
+ tgsi_parse_free( &parse );
+ return FALSE;
+}
+
+
+
+static void vaos_set_buffer( struct draw_vs_varient *varient,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ unsigned i;
+
+ for (i = 0; i < vaos->base.key.nr_inputs; i++) {
+ if (vaos->base.key.element[i].in.buffer == buf) {
+ vaos->attrib[i].input_ptr = ((char *)ptr +
+ vaos->base.key.element[i].in.offset);
+ vaos->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+
+static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
+
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
+
+ vaos->gen_run_elts( machine,
+ elts,
+ count,
+ output_buffer );
+}
+
+static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
+
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
+
+ vaos->gen_run_linear( machine,
+ start,
+ count,
+ output_buffer );
+}
+
+
+
+static void vaos_destroy( struct draw_vs_varient *varient )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ FREE( vaos->attrib );
+
+ x86_release_func( &vaos->func[0] );
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+}
+
+
+
+static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
+
+ if (!vaos)
+ goto fail;
+
+ vaos->base.key = *key;
+ vaos->base.vs = vs;
+ vaos->base.set_input = vaos_set_buffer;
+ vaos->base.destroy = vaos_destroy;
+ vaos->base.run_linear = vaos_run_linear;
+ vaos->base.run_elts = vaos_run_elts;
+
+ vaos->draw = vs->draw;
+
+ vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
+ if (!vaos->attrib)
+ goto fail;
+
+#if 0
+ tgsi_dump(vs->state.tokens, 0);
+#endif
+
+ if (!build_vertex_program( vaos, TRUE ))
+ goto fail;
+
+ if (!build_vertex_program( vaos, FALSE ))
+ goto fail;
+
+ vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
+ if (!vaos->gen_run_linear)
+ goto fail;
+
+ vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
+ if (!vaos->gen_run_elts)
+ goto fail;
+
+ return &vaos->base;
+
+ fail:
+ if (vaos && vaos->attrib)
+ FREE(vaos->attrib);
+
+ if (vaos)
+ x86_release_func( &vaos->func[0] );
+
+ if (vaos)
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+
+ return NULL;
+}
+
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient = varient_aos_sse( vs, key );
+
+ if (varient == NULL) {
+ varient = draw_vs_varient_generic( vs, key );
+ }
+
+ return varient;
+}
+
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
new file mode 100644
index 0000000000..64e021ff6b
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -0,0 +1,248 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VS_AOS_H
+#define DRAW_VS_AOS_H
+
+#include "pipe/p_config.h"
+
+#ifdef PIPE_ARCH_X86
+
+struct tgsi_token;
+struct x86_function;
+
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_x86sse.h"
+
+
+
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+#define MAX_INPUTS PIPE_MAX_ATTRIBS
+#define MAX_OUTPUTS PIPE_MAX_ATTRIBS
+#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
+#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
+#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
+#define MAX_INTERNALS 8
+
+#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
+
+#define FPU_RND_NEG 1
+#define FPU_RND_NEAREST 2
+
+struct aos_machine;
+typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
+ float *result,
+ const float *in,
+ unsigned count );
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count );
+
+struct shine_tab {
+ float exponent;
+ float values[258];
+ unsigned last_used;
+};
+
+struct lit_info {
+ lit_func func;
+ struct shine_tab *shine_tab;
+};
+
+#define MAX_SHINE_TAB 4
+#define MAX_LIT_INFO 16
+
+struct aos_attrib {
+ const void *input_ptr;
+ unsigned input_stride;
+};
+
+
+
+
+/* This is the temporary storage used by all the aos_sse vs varients.
+ * Create one per context and reuse by passing a pointer in at
+ * vs_varient creation??
+ */
+struct aos_machine {
+ float input [MAX_INPUTS ][4];
+ float output [MAX_OUTPUTS ][4];
+ float temp [MAX_TEMPS ][4];
+ float internal [MAX_INTERNALS ][4];
+
+ float scale[4]; /* viewport */
+ float translate[4]; /* viewport */
+
+ float tmp[2][4]; /* scratch space for LIT */
+
+ struct shine_tab shine_tab[MAX_SHINE_TAB];
+ struct lit_info lit_info[MAX_LIT_INFO];
+ unsigned now;
+
+
+ ushort fpu_rnd_nearest;
+ ushort fpu_rnd_neg_inf;
+ ushort fpu_restore;
+ ushort fpucntl; /* one of FPU_* above */
+
+ const float (*immediates)[4]; /* points to shader data */
+ const float (*constants)[4]; /* points to draw data */
+
+ const struct aos_attrib *attrib; /* points to ? */
+};
+
+
+
+
+struct aos_compilation {
+ struct x86_function *func;
+ struct draw_vs_varient_aos_sse *vaos;
+
+ unsigned insn_counter;
+ unsigned num_immediates;
+ unsigned count;
+ unsigned lit_count;
+
+ struct {
+ unsigned idx:16;
+ unsigned file:8;
+ unsigned dirty:8;
+ unsigned last_used;
+ } xmm[8];
+
+ unsigned x86_reg[2]; /* one of X86_* */
+
+ boolean input_fetched[PIPE_MAX_ATTRIBS];
+ unsigned output_last_write[PIPE_MAX_ATTRIBS];
+
+ boolean have_sse2;
+ boolean error;
+ short fpucntl;
+
+ /* these are actually known values, but putting them in a struct
+ * like this is helpful to keep them in sync across the file.
+ */
+ struct x86_reg tmp_EAX;
+ struct x86_reg idx_EBX; /* either start+i or &elt[i] */
+ struct x86_reg outbuf_ECX;
+ struct x86_reg machine_EDX;
+ struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg temp_EBP;
+ struct x86_reg stack_ESP;
+};
+
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
+void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
+
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty );
+
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx );
+
+boolean aos_fetch_inputs( struct aos_compilation *cp,
+ boolean linear );
+
+boolean aos_emit_outputs( struct aos_compilation *cp );
+
+
+#define IMM_ONES 0 /* 1, 1,1,1 */
+#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
+#define IMM_IDENTITY 2 /* 0, 0,0,1 */
+#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
+#define IMM_255 4 /* 255, 255, 255, 255 */
+#define IMM_NEGS 5 /* -1,-1,-1,-1 */
+#define IMM_RSQ 6 /* -.5,1.5,_,_ */
+#define IMM_PSIZE 7 /* not really an immediate - updated each run */
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm );
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm );
+
+
+#define ERROR(cp, msg) \
+do { \
+ if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
+ cp->error = 1; \
+} while (0)
+
+
+#define X86_NULL 0
+#define X86_IMMEDIATES 1
+#define X86_CONSTANTS 2
+#define X86_ATTRIBS 3
+
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned which_reg,
+ unsigned value );
+
+
+typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+
+
+struct draw_vs_varient_aos_sse {
+ struct draw_vs_varient base;
+ struct draw_context *draw;
+
+ struct aos_attrib *attrib;
+
+ vaos_run_linear_func gen_run_linear;
+ vaos_run_elts_func gen_run_elts;
+
+
+ struct x86_function func[2];
+};
+
+
+#endif
+
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
new file mode 100644
index 0000000000..6b92811870
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -0,0 +1,325 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+
+/* Note - don't yet have to worry about interacting with the code in
+ * draw_vs_aos.c as there is no intermingling of generated code...
+ * That may have to change, we'll see.
+ */
+static void emit_load_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, src_ptr);
+}
+
+static void emit_load_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
+ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
+ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(cp->func, data, src_ptr);
+}
+
+static void emit_load_R32G32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
+ sse_movlps(cp->func, data, src_ptr);
+}
+
+
+static void emit_load_R32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
+}
+
+
+static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_cvtdq2ps(cp->func, data, data);
+ sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
+}
+
+
+
+static void get_src_ptr( struct aos_compilation *cp,
+ struct x86_reg src,
+ struct x86_reg elt,
+ unsigned a )
+{
+ struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ),
+ a * sizeof(struct aos_attrib));
+
+ struct x86_reg input_ptr = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_ptr));
+
+ struct x86_reg input_stride = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_stride));
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(cp->func, src, input_stride);
+ x86_imul(cp->func, src, elt);
+ x86_add(cp->func, src, input_ptr);
+}
+
+
+/* Extended swizzles? Maybe later.
+ */
+static void emit_swizzle( struct aos_compilation *cp,
+ struct x86_reg dest,
+ struct x86_reg src,
+ ubyte shuffle )
+{
+ sse_shufps(cp->func, dest, src, shuffle);
+}
+
+
+static boolean load_input( struct aos_compilation *cp,
+ unsigned idx,
+ boolean linear )
+{
+ unsigned format = cp->vaos->base.key.element[idx].in.format;
+ struct x86_reg src = cp->tmp_EAX;
+ struct x86_reg dataXMM = aos_get_xmm_reg(cp);
+
+ /* Figure out source pointer address:
+ */
+ get_src_ptr(cp,
+ src,
+ linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
+ idx);
+
+ src = x86_deref(src);
+
+ aos_adopt_xmm_reg( cp,
+ dataXMM,
+ TGSI_FILE_INPUT,
+ idx,
+ TRUE );
+
+ switch (format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_load_R32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_load_R32G32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_load_R32G32B32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_load_R32G32B32A32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ break;
+ default:
+ ERROR(cp, "unhandled input format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
+ if (!load_input( cp, i, linear ))
+ return FALSE;
+ cp->insn_counter++;
+ debug_printf("\n");
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+
+static void emit_store_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movups(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+ sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
+}
+
+static void emit_store_R32G32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
+ sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
+ sse2_packssdw(cp->func, dataXMM, dataXMM);
+ sse2_packuswb(cp->func, dataXMM, dataXMM);
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+
+
+static boolean emit_output( struct aos_compilation *cp,
+ struct x86_reg ptr,
+ struct x86_reg dataXMM,
+ unsigned format )
+{
+ switch (format) {
+ case EMIT_1F:
+ case EMIT_1F_PSIZE:
+ emit_store_R32(cp, ptr, dataXMM);
+ break;
+ case EMIT_2F:
+ emit_store_R32G32(cp, ptr, dataXMM);
+ break;
+ case EMIT_3F:
+ emit_store_R32G32B32(cp, ptr, dataXMM);
+ break;
+ case EMIT_4F:
+ emit_store_R32G32B32A32(cp, ptr, dataXMM);
+ break;
+ case EMIT_4UB:
+ if (1) {
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ }
+ else {
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ }
+ break;
+ default:
+ ERROR(cp, "unhandled output format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
+boolean aos_emit_outputs( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
+ unsigned format = cp->vaos->base.key.element[i].out.format;
+ unsigned offset = cp->vaos->base.key.element[i].out.offset;
+ unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
+
+ struct x86_reg data;
+
+ if (format == EMIT_1F_PSIZE) {
+ data = aos_get_internal_xmm( cp, IMM_PSIZE );
+ }
+ else {
+ data = aos_get_shader_reg( cp,
+ TGSI_FILE_OUTPUT,
+ vs_output );
+ }
+
+ if (data.file != file_XMM) {
+ struct x86_reg tmp = aos_get_xmm_reg( cp );
+ sse_movups(cp->func, tmp, data);
+ data = tmp;
+ }
+
+ if (!emit_output( cp,
+ x86_make_disp( cp->outbuf_ECX, offset ),
+ data,
+ format ))
+ return FALSE;
+
+ aos_release_xmm_reg( cp, data.idx );
+
+ cp->insn_counter++;
+ debug_printf("\n");
+ }
+
+ return TRUE;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
new file mode 100644
index 0000000000..6a54917ae3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -0,0 +1,323 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#ifdef PIPE_ARCH_X86
+
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ }
+ else
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ return;
+ }
+
+ if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+ machine->lit_info[count].func = aos_do_lit;
+ goto no_luck;
+ }
+
+ if (in[1] <= 1.0)
+ {
+ const float *tab = machine->lit_info[count].shine_tab->values;
+ float f = in[1] * 256;
+ int k = (int)f;
+ float frac = f - (float)k;
+
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+ result[3] = 1.0;
+ return;
+ }
+
+ no_luck:
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void do_populate_lut( struct shine_tab *tab,
+ float unclamped_exponent )
+{
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+ unsigned i;
+
+ tab->exponent = unclamped_exponent; /* for later comparison */
+
+ tab->values[0] = 0;
+ if (exponent == 0) {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = 1.0;
+ }
+ }
+ else {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = powf((float)i * epsilon, exponent);
+ }
+ }
+}
+
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ unsigned i, tab;
+
+ /* Search for an existing table for this value. Note that without
+ * static analysis we don't really know if in[3] will be constant,
+ * but it usually is...
+ */
+ for (tab = 0; tab < 4; tab++) {
+ if (machine->shine_tab[tab].exponent == in[3]) {
+ goto found;
+ }
+ }
+
+ for (tab = 0, i = 1; i < 4; i++) {
+ if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+ tab = i;
+ }
+
+ if (machine->shine_tab[tab].last_used == machine->now) {
+ /* No unused tables (this is not a ffvertex program...). Just
+ * call pow each time:
+ */
+ machine->lit_info[count].func = aos_do_lit;
+ machine->lit_info[count].func( machine, result, in, count );
+ return;
+ }
+ else {
+ do_populate_lut( &machine->shine_tab[tab], in[3] );
+ }
+
+ found:
+ machine->shine_tab[tab].last_used = machine->now;
+ machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+ machine->lit_info[count].func = do_lit_lut;
+ machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+ machine->constants = constants;
+
+ {
+ unsigned i;
+ for (i = 0; i < MAX_LIT_INFO; i++) {
+ machine->lit_info[i].func = populate_lut;
+ machine->now++;
+ }
+ }
+}
+
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+ memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
+ memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+ align_free(machine);
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ struct aos_machine *machine;
+ unsigned i;
+ float inv = 1.0f/255.0f;
+ float f255 = 255.0f;
+
+ machine = align_malloc(sizeof(struct aos_machine), 16);
+ if (!machine)
+ return NULL;
+
+ memset(machine, 0, sizeof(*machine));
+
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
+ ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
+ ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
+
+
+ machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_NEAREST |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ assert(machine->fpu_rnd_nearest == 0x37f);
+
+ machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_DOWN |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ for (i = 0; i < MAX_SHINE_TAB; i++)
+ do_populate_lut( &machine->shine_tab[i], 1.0f );
+
+ return machine;
+}
+
+#else
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+}
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+}
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ return NULL;
+}
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 7a02f6334b..4501877efc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -179,10 +179,12 @@ draw_create_vs_exec(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->base.draw = draw;
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->machine = &draw->machine;
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->machine = &draw->vs.machine;
return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 171da51dd5..621472ec7c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
+ vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
+ vs->base.create_varient = draw_vs_varient_generic;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
vs->machine = &draw->machine;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index e3f4e67472..c3189c707d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -31,9 +31,11 @@
* Brian Paul
*/
+#include "pipe/p_config.h"
+
#include "draw_vs.h"
-#if defined(__i386__) || defined(__386__)
+#if defined(PIPE_ARCH_X86)
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
@@ -47,10 +49,8 @@
#include "tgsi/util/tgsi_parse.h"
#define SSE_MAX_VERTICES 4
-#define SSE_SWIZZLES 1
-#if SSE_SWIZZLES
-typedef void (XSTDCALL *codegen_function) (
+typedef void (PIPE_CDECL *codegen_function) (
const struct tgsi_exec_vector *input, /* 1 */
struct tgsi_exec_vector *output, /* 2 */
float (*constant)[4], /* 3 */
@@ -62,14 +62,6 @@ typedef void (XSTDCALL *codegen_function) (
float (*aos_output)[4], /* 9 */
uint num_outputs, /* 10 */
uint output_stride ); /* 11 */
-#else
-typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary,
- float (*immediates)[4] );
-#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
@@ -78,8 +70,6 @@ struct draw_sse_vertex_shader {
codegen_function func;
struct tgsi_exec_machine *machine;
-
- float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@@ -111,14 +101,13 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
-#if SSE_SWIZZLES
/* run compiled shader
*/
shader->func(machine->Inputs,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
- shader->immediates,
+ (float (*)[4])shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -128,43 +117,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
output = (float (*)[4])((char *)output + output_stride * max_vertices);
-#else
- unsigned int j, slot;
-
- /* Swizzle inputs.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_inputs; slot++) {
- machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
- machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
- machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
- machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
- }
-
- input = (const float (*)[4])((const char *)input + input_stride);
- }
-
- /* run compiled shader
- */
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- shader->immediates);
-
- /* Unswizzle all output results.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-
- output = (float (*)[4])((char *)output + output_stride);
- }
-#endif
}
}
@@ -178,6 +130,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
x86_release_func( &shader->sse2_program );
+ align_free( (void *) shader->base.immediates );
+
FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
@@ -203,15 +157,26 @@ draw_create_vs_sse(struct draw_context *draw,
tgsi_scan_shader(templ->tokens, &vs->base.info);
+ vs->base.draw = draw;
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_sse;
+ else
+ vs->base.create_varient = draw_vs_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
- vs->machine = &draw->machine;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
+ &vs->sse2_program,
+ (float (*)[4])vs->base.immediates,
+ TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
@@ -222,7 +187,7 @@ draw_create_vs_sse(struct draw_context *draw,
return &vs->base;
fail:
- fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n");
+ debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
x86_release_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
new file mode 100644
index 0000000000..ad0b829afa
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -0,0 +1,321 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+/* A first pass at incorporating vertex fetch/emit functionality into
+ */
+struct draw_vs_varient_generic {
+ struct draw_vs_varient base;
+
+ struct draw_vertex_shader *shader;
+ struct draw_context *draw;
+
+ /* Basic plan is to run these two translate functions before/after
+ * the vertex shader's existing run_linear() routine to simulate
+ * the inclusion of this functionality into the shader...
+ *
+ * Next will look at actually including it.
+ */
+ struct translate *fetch;
+ struct translate *emit;
+
+ unsigned temp_vertex_stride;
+};
+
+
+
+
+
+static void vsvg_set_input( struct draw_vs_varient *varient,
+ unsigned buffer,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ vsvg->fetch->set_buffer(vsvg->fetch,
+ buffer,
+ ptr,
+ stride);
+}
+
+
+/* Mainly for debug at this stage:
+ */
+static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
+ unsigned count,
+ void *output_buffer )
+{
+ char *ptr = (char *)output_buffer;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
+ unsigned stride = vsvg->temp_vertex_stride;
+ unsigned j;
+
+ ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
+
+ for (j = 0; j < count; j++, ptr += stride) {
+ float *data = (float *)ptr;
+ float w = 1.0f / data[3];
+
+ data[0] = data[0] * w * scale[0] + trans[0];
+ data[1] = data[1] * w * scale[1] + trans[1];
+ data[2] = data[2] * w * scale[2] + trans[2];
+ data[3] = w;
+ }
+}
+
+static void do_viewport( struct draw_vs_varient_generic *vsvg,
+ unsigned count,
+ void *output_buffer )
+{
+ char *ptr = (char *)output_buffer;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
+ unsigned stride = vsvg->temp_vertex_stride;
+ unsigned j;
+
+ ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
+
+ for (j = 0; j < count; j++, ptr += stride) {
+ float *data = (float *)ptr;
+
+ data[0] = data[0] * scale[0] + trans[0];
+ data[1] = data[1] * scale[1] + trans[1];
+ data[2] = data[2] * scale[2] + trans[2];
+ }
+}
+
+
+static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer)
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
+ void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
+
+ if (0) debug_printf("%s %d \n", __FUNCTION__, count);
+
+ /* Want to do this in small batches for cache locality?
+ */
+
+ vsvg->fetch->run_elts( vsvg->fetch,
+ elts,
+ count,
+ temp_buffer );
+
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ temp_buffer,
+ temp_buffer,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ count,
+ temp_vertex_stride,
+ temp_vertex_stride);
+
+
+ if (vsvg->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ do_rhw_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+ else if (vsvg->base.key.viewport) {
+ do_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ temp_buffer,
+ temp_vertex_stride );
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 1,
+ &vsvg->draw->rasterizer->point_size,
+ 0);
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+
+ FREE(temp_buffer);
+}
+
+
+static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+ unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
+ void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
+
+ if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
+ vsvg->base.key.output_stride,
+ temp_vertex_stride);
+
+ vsvg->fetch->run( vsvg->fetch,
+ start,
+ count,
+ temp_buffer );
+
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ temp_buffer,
+ temp_buffer,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+ count,
+ temp_vertex_stride,
+ temp_vertex_stride);
+
+ if (vsvg->base.key.clip) {
+ /* not really handling clipping, just do the rhw so we can
+ * see the results...
+ */
+ do_rhw_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+ else if (vsvg->base.key.viewport) {
+ do_viewport( vsvg,
+ count,
+ temp_buffer );
+ }
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ temp_buffer,
+ temp_vertex_stride );
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 1,
+ &vsvg->draw->rasterizer->point_size,
+ 0);
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+
+ FREE(temp_buffer);
+}
+
+
+
+
+
+static void vsvg_destroy( struct draw_vs_varient *varient )
+{
+ FREE(varient);
+}
+
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ unsigned i;
+ struct translate_key fetch, emit;
+
+ struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
+ if (vsvg == NULL)
+ return NULL;
+
+ vsvg->base.key = *key;
+ vsvg->base.vs = vs;
+ vsvg->base.set_input = vsvg_set_input;
+ vsvg->base.run_elts = vsvg_run_elts;
+ vsvg->base.run_linear = vsvg_run_linear;
+ vsvg->base.destroy = vsvg_destroy;
+
+ vsvg->draw = vs->draw;
+
+ vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
+ vsvg->base.vs->info.num_outputs) * 4 * sizeof(float);
+
+ /* Build free-standing fetch and emit functions:
+ */
+ fetch.nr_elements = key->nr_inputs;
+ fetch.output_stride = vsvg->temp_vertex_stride;
+ for (i = 0; i < key->nr_inputs; i++) {
+ fetch.element[i].input_format = key->element[i].in.format;
+ fetch.element[i].input_buffer = key->element[i].in.buffer;
+ fetch.element[i].input_offset = key->element[i].in.offset;
+ fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fetch.element[i].output_offset = i * 4 * sizeof(float);
+ assert(fetch.element[i].output_offset < fetch.output_stride);
+ }
+
+
+ emit.nr_elements = key->nr_outputs;
+ emit.output_stride = key->output_stride;
+ for (i = 0; i < key->nr_outputs; i++) {
+ if (key->element[i].out.format != EMIT_1F_PSIZE)
+ {
+ emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit.element[i].input_buffer = 0;
+ emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+ emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
+ emit.element[i].output_offset = key->element[i].out.offset;
+ assert(emit.element[i].input_offset < fetch.output_stride);
+ }
+ else {
+ emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+ emit.element[i].input_buffer = 1;
+ emit.element[i].input_offset = 0;
+ emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
+ emit.element[i].output_offset = key->element[i].out.offset;
+ }
+ }
+
+ vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
+ vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
+
+ return &vsvg->base;
+}
+
+
+
+
+
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 9695358ab8..98014bdaa1 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -96,10 +96,8 @@ translate_declaration(struct gallivm_ir *prog,
unsigned first, last, mask;
uint interp_method;
- assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
/* Do not touch WPOS.xy */
@@ -113,7 +111,7 @@ translate_declaration(struct gallivm_ir *prog,
}
}
- interp_method = decl->Interpolation.Interpolate;
+ interp_method = decl->Declaration.Interpolate;
if (mask == TGSI_WRITEMASK_XYZW) {
unsigned i, j;
@@ -153,7 +151,7 @@ translate_declarationir(struct gallivm_ir *,
struct tgsi_full_declaration *)
{
if (decl->Declaration.File == TGSI_FILE_ADDRESS) {
- int idx = decl->u.DeclarationRange.First;
+ int idx = decl->DeclarationRange.First;
storage->addAddress(idx);
}
}
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
index ff09011b66..1d9b036c07 100644
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ b/src/gallium/auxiliary/pipebuffer/Makefile
@@ -6,6 +6,7 @@ LIBNAME = pipebuffer
C_SOURCES = \
pb_buffer_fenced.c \
pb_buffer_malloc.c \
+ pb_bufmgr_alt.c \
pb_bufmgr_cache.c \
pb_bufmgr_fenced.c \
pb_bufmgr_mm.c \
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
index 9db0c0eae3..e52177bc79 100644
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ b/src/gallium/auxiliary/pipebuffer/SConscript
@@ -5,6 +5,7 @@ pipebuffer = env.ConvenienceLibrary(
source = [
'pb_buffer_fenced.c',
'pb_buffer_malloc.c',
+ 'pb_bufmgr_alt.c',
'pb_bufmgr_cache.c',
'pb_bufmgr_fenced.c',
'pb_bufmgr_mm.c',
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index 9e8244f909..e90d2e5623 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -30,13 +30,14 @@
* Implementation of malloc-based buffers to store data that can't be processed
* by the hardware.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pb_buffer.h"
+#include "pb_bufmgr.h"
struct malloc_buffer
@@ -119,9 +120,39 @@ pb_malloc_buffer_create(size_t size,
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
if(!buf->data) {
- align_free(buf);
+ FREE(buf);
return NULL;
}
return &buf->base;
}
+
+
+static struct pb_buffer *
+pb_malloc_buffer_create_buffer(struct pb_manager *mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ return pb_malloc_buffer_create(size, desc);
+}
+
+
+static void
+pb_malloc_bufmgr_destroy(struct pb_manager *mgr)
+{
+ /* No-op */
+}
+
+
+static struct pb_manager
+pb_malloc_bufmgr = {
+ pb_malloc_buffer_create_buffer,
+ pb_malloc_bufmgr_destroy
+};
+
+
+struct pb_manager *
+pb_malloc_bufmgr_create(void)
+{
+ return &pb_malloc_bufmgr;
+}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index f6cc7a525b..00279f7010 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -79,6 +79,15 @@ struct pb_manager
};
+/**
+ * Malloc buffer provider.
+ *
+ * Simple wrapper around pb_malloc_buffer_create for convenience.
+ */
+struct pb_manager *
+pb_malloc_bufmgr_create(void);
+
+
/**
* Static buffer pool sub-allocator.
*
@@ -162,6 +171,11 @@ fenced_bufmgr_create(struct pb_manager *provider,
struct pipe_winsys *winsys);
+struct pb_manager *
+pb_alt_manager_create(struct pb_manager *provider1,
+ struct pb_manager *provider2);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
new file mode 100644
index 0000000000..702bef1c04
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Allocate buffers from two alternative buffer providers.
+ *
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+struct pb_alt_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider1;
+ struct pb_manager *provider2;
+};
+
+
+static INLINE struct pb_alt_manager *
+pb_alt_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_alt_manager *)mgr;
+}
+
+
+static struct pb_buffer *
+pb_alt_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_alt_manager *mgr = pb_alt_manager(_mgr);
+ struct pb_buffer *buf;
+
+ buf = mgr->provider1->create_buffer(mgr->provider1, size, desc);
+ if(buf)
+ return buf;
+
+ buf = mgr->provider2->create_buffer(mgr->provider2, size, desc);
+ return buf;
+}
+
+
+static void
+pb_alt_manager_destroy(struct pb_manager *mgr)
+{
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_alt_manager_create(struct pb_manager *provider1,
+ struct pb_manager *provider2)
+{
+ struct pb_alt_manager *mgr;
+
+ mgr = CALLOC_STRUCT(pb_alt_manager);
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_alt_manager_destroy;
+ mgr->base.create_buffer = pb_alt_manager_create_buffer;
+ mgr->provider1 = provider1;
+ mgr->provider2 = provider2;
+
+ return &mgr->base;
+}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 4bd3f94a6c..f1a457dde4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -207,8 +207,11 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
size_t size,
const struct pb_desc *desc)
{
- /* TODO: be more lenient with size */
- if(buf->base.base.size != size)
+ if(buf->base.base.size < size)
+ return FALSE;
+
+ /* be lenient with size */
+ if(buf->base.base.size >= 2*size)
return FALSE;
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 66256f3fa7..0a1e8c83b1 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -184,7 +184,6 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
if(!mm_buf->block) {
- assert(0);
FREE(mm_buf);
_glthread_UNLOCK_MUTEX(mm->mutex);
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 45ba158a4d..b9dff09804 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -312,8 +312,8 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr,
struct list_head *list;
/* check size */
- assert(size == mgr->bufSize);
- if(size != mgr->bufSize)
+ assert(size <= mgr->bufSize);
+ if(size > mgr->bufSize)
return NULL;
/* check if we can provide the requested alignment */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index f01e12faa0..5499018b21 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -47,7 +47,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
return rtasm_sse_enabled();
#else
return 0;
@@ -57,7 +57,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
return rtasm_sse_enabled();
#else
return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 4e036d9032..f4ca282dd9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -21,7 +21,9 @@
*
**************************************************************************/
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
#include "pipe/p_compiler.h"
#include "pipe/p_debug.h"
@@ -36,11 +38,8 @@
#define DUMP_SSE 0
-#if DUMP_SSE
-static void
-_print_reg(
- struct x86_reg reg )
+void x86_print_reg( struct x86_reg reg )
{
if (reg.mod != mod_REG)
debug_printf( "[" );
@@ -77,6 +76,7 @@ _print_reg(
debug_printf( "]" );
}
+#if DUMP_SSE
#define DUMP_START() debug_printf( "\n" )
#define DUMP_END() debug_printf( "\n" )
@@ -87,7 +87,7 @@ _print_reg(
foo++; \
if (*foo) \
foo++; \
- debug_printf( "\n% 15s ", foo ); \
+ debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \
} while (0)
#define DUMP_I( I ) do { \
@@ -97,27 +97,27 @@ _print_reg(
#define DUMP_R( R0 ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
} while( 0 )
#define DUMP_RR( R0, R1 ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", " ); \
- _print_reg( R1 ); \
+ x86_print_reg( R1 ); \
} while( 0 )
#define DUMP_RI( R0, I ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", %u", I ); \
} while( 0 )
#define DUMP_RRI( R0, R1, I ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", " ); \
- _print_reg( R1 ); \
+ x86_print_reg( R1 ); \
debug_printf( ", %u", I ); \
} while( 0 )
@@ -220,6 +220,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1
/* Build a modRM byte + possible displacement. No treatment of SIB
* indexing. BZZT - no way to encode an absolute address.
+ *
+ * This is the "/r" field in the x86 manuals...
*/
static void emit_modrm( struct x86_function *p,
struct x86_reg reg,
@@ -258,7 +260,8 @@ static void emit_modrm( struct x86_function *p,
}
}
-
+/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
+ */
static void emit_modrm_noreg( struct x86_function *p,
unsigned op,
struct x86_reg regmem )
@@ -327,7 +330,7 @@ struct x86_reg x86_make_disp( struct x86_reg reg,
else
reg.disp += disp;
- if (reg.disp == 0)
+ if (reg.disp == 0 && reg.idx != reg_BP)
reg.mod = mod_INDIRECT;
else if (reg.disp <= 127 && reg.disp >= -128)
reg.mod = mod_DISP8;
@@ -367,8 +370,7 @@ void x86_jcc( struct x86_function *p,
DUMP_I(cc);
if (offset < 0) {
- int amt = p->csr - p->store;
- assert(amt > -offset);
+ assert(p->csr - p->store > -offset);
}
if (offset <= 127 && offset >= -128) {
@@ -445,6 +447,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
+{
+ DUMP_RI( dst, imm );
+ assert(dst.mod == mod_REG);
+ emit_1ub(p, 0x80);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+}
+
+
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
@@ -461,6 +473,17 @@ void x86_push( struct x86_function *p,
p->stack_offset += 4;
}
+void x86_push_imm32( struct x86_function *p,
+ int imm32 )
+{
+ DUMP_I( imm32 );
+ emit_1ub(p, 0x68);
+ emit_1i(p, imm32);
+
+ p->stack_offset += 4;
+}
+
+
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
@@ -865,7 +888,7 @@ void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse_cmpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
- unsigned char cc)
+ enum sse_cc cc)
{
DUMP_RRI( dst, src, cc );
emit_2ub(p, X86_TWOB, 0xC2);
@@ -988,6 +1011,24 @@ void sse2_movd( struct x86_function *p,
/***********************************************************************
* x87 instructions
*/
+static void note_x87_pop( struct x86_function *p )
+{
+ p->x87_stack--;
+ assert(p->x87_stack >= 0);
+}
+
+static void note_x87_push( struct x86_function *p )
+{
+ p->x87_stack++;
+ assert(p->x87_stack <= 7);
+}
+
+void x87_assert_stack_empty( struct x86_function *p )
+{
+ assert (p->x87_stack == 0);
+}
+
+
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -1000,6 +1041,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst )
DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
+ note_x87_pop(p);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
@@ -1007,12 +1049,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
+ note_x87_push(p);
}
void x87_fldz( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xee);
+ note_x87_push(p);
}
@@ -1029,18 +1073,21 @@ void x87_fld1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe8);
+ note_x87_push(p);
}
void x87_fldl2e( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xea);
+ note_x87_push(p);
}
void x87_fldln2( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xed);
+ note_x87_push(p);
}
void x87_fwait( struct x86_function *p )
@@ -1061,6 +1108,49 @@ void x87_fclex( struct x86_function *p )
x87_fnclex(p);
}
+void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xc0+arg.idx);
+}
+
+void x87_fcmove( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xc8+arg.idx);
+}
+
+void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xd0+arg.idx);
+}
+
+void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xc0+arg.idx);
+}
+
+void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xc8+arg.idx);
+}
+
+void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xd0+arg.idx);
+}
+
+
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
unsigned char dst0ub0,
@@ -1148,6 +1238,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
+ note_x87_pop(p);
}
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
@@ -1156,6 +1247,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
+ note_x87_pop(p);
}
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
@@ -1164,6 +1256,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
+ note_x87_pop(p);
}
void x87_faddp( struct x86_function *p, struct x86_reg dst )
@@ -1172,6 +1265,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
+ note_x87_pop(p);
}
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
@@ -1180,6 +1274,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
+ note_x87_pop(p);
}
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
@@ -1188,6 +1283,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
+ note_x87_pop(p);
+}
+
+void x87_ftst( struct x86_function *p )
+{
+ DUMP();
+ emit_2ub(p, 0xd9, 0xe4);
}
void x87_fucom( struct x86_function *p, struct x86_reg arg )
@@ -1202,12 +1304,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
+ note_x87_pop(p);
}
void x87_fucompp( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xda, 0xe9);
+ note_x87_pop(p); /* pop twice */
+ note_x87_pop(p); /* pop twice */
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
@@ -1289,6 +1394,7 @@ void x87_fyl2x( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf1);
+ note_x87_pop(p);
}
/* st1 = st1 * log2(st0 + 1.0);
@@ -1300,6 +1406,7 @@ void x87_fyl2xp1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf9);
+ note_x87_pop(p);
}
@@ -1312,6 +1419,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 0, arg);
}
+ note_x87_push(p);
}
void x87_fst( struct x86_function *p, struct x86_reg dst )
@@ -1334,8 +1442,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 3, dst);
}
+ note_x87_pop(p);
}
+void x87_fpop( struct x86_function *p )
+{
+ x87_fstp( p, x86_make_reg( file_x87, 0 ));
+}
+
+
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -1347,6 +1462,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
}
}
+
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -1356,6 +1472,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, 3, dst);
}
+ note_x87_pop(p);
+}
+
+void x87_fcomi( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ emit_2ub(p, 0xdb, 0xf0+arg.idx);
+}
+
+void x87_fcomip( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ emit_2ub(p, 0xdb, 0xf0+arg.idx);
+ note_x87_pop(p);
}
@@ -1374,6 +1504,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
}
+void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
+{
+ DUMP_R( dst );
+ assert(dst.file == file_REG32);
+
+ emit_1ub(p, 0x9b); /* WAIT -- needed? */
+ emit_1ub(p, 0xd9);
+ emit_modrm_noreg(p, 7, dst);
+}
+
+
/***********************************************************************
@@ -1442,6 +1583,21 @@ void mmx_movq( struct x86_function *p,
*/
+void x86_cdecl_caller_push_regs( struct x86_function *p )
+{
+ x86_push(p, x86_make_reg(file_REG32, reg_AX));
+ x86_push(p, x86_make_reg(file_REG32, reg_CX));
+ x86_push(p, x86_make_reg(file_REG32, reg_DX));
+}
+
+void x86_cdecl_caller_pop_regs( struct x86_function *p )
+{
+ x86_pop(p, x86_make_reg(file_REG32, reg_DX));
+ x86_pop(p, x86_make_reg(file_REG32, reg_CX));
+ x86_pop(p, x86_make_reg(file_REG32, reg_AX));
+}
+
+
/* Retreive a reference to one of the function arguments, taking into
* account any push/pop activity:
*/
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index eacaeeaf6f..af94577aab 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,7 +24,9 @@
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
@@ -41,10 +43,12 @@ struct x86_function {
unsigned size;
unsigned char *store;
unsigned char *csr;
- unsigned stack_offset;
- int need_emms;
+
+ unsigned stack_offset:16;
+ unsigned need_emms:8;
+ int x87_stack:8;
+
unsigned char error_overflow[4];
- const char *fn;
};
enum x86_reg_file {
@@ -107,6 +111,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void );
+/* Debugging:
+ */
+void x86_print_reg( struct x86_reg reg );
/* Create and manipulate registers and regmem values:
@@ -150,6 +157,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg);
* I load the immediate into general purpose register and use it.
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm );
/* Macro for sse_shufps() and sse2_pshufd():
@@ -183,7 +191,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
- unsigned char cc );
+ enum sse_cc cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -220,6 +228,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
+void x86_push_imm32( struct x86_function *p, int imm );
void x86_ret( struct x86_function *p );
void x86_retw( struct x86_function *p, unsigned short imm );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -227,13 +236,27 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
+
+void x86_cdecl_caller_push_regs( struct x86_function *p );
+void x86_cdecl_caller_pop_regs( struct x86_function *p );
+
+void x87_assert_stack_empty( struct x86_function *p );
+
void x87_f2xm1( struct x86_function *p );
void x87_fabs( struct x86_function *p );
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_faddp( struct x86_function *p, struct x86_reg dst );
void x87_fchs( struct x86_function *p );
void x87_fclex( struct x86_function *p );
+void x87_fcmovb( struct x86_function *p, struct x86_reg src );
+void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
+void x87_fcmove( struct x86_function *p, struct x86_reg src );
+void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
+void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
+void x87_fcmovne( struct x86_function *p, struct x86_reg src );
void x87_fcom( struct x86_function *p, struct x86_reg dst );
+void x87_fcomi( struct x86_function *p, struct x86_reg dst );
+void x87_fcomip( struct x86_function *p, struct x86_reg dst );
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
void x87_fcos( struct x86_function *p );
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
@@ -253,6 +276,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
void x87_fnclex( struct x86_function *p );
void x87_fprndint( struct x86_function *p );
+void x87_fpop( struct x86_function *p );
void x87_fscale( struct x86_function *p );
void x87_fsin( struct x86_function *p );
void x87_fsincos( struct x86_function *p );
@@ -263,11 +287,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
+void x87_ftst( struct x86_function *p );
void x87_fxch( struct x86_function *p, struct x86_reg dst );
void x87_fxtract( struct x86_function *p );
void x87_fyl2x( struct x86_function *p );
void x87_fyl2xp1( struct x86_function *p );
void x87_fwait( struct x86_function *p );
+void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
void x87_fucompp( struct x86_function *p );
void x87_fucomp( struct x86_function *p, struct x86_reg arg );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 826b432f09..13b8c2e5bf 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -165,9 +165,17 @@ tgsi_exec_machine_bind_shader(
declarations = (struct tgsi_full_declaration *)
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
+ if (!declarations) {
+ return;
+ }
+
instructions = (struct tgsi_full_instruction *)
MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
+ if (!instructions) {
+ FREE( declarations );
+ return;
+ }
while( !tgsi_parse_end_of_tokens( &parse ) ) {
uint pointer = parse.Position;
@@ -1411,13 +1419,11 @@ exec_declaration(
unsigned first, last, mask;
eval_coef_func eval;
- assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- switch( decl->Interpolation.Interpolate ) {
+ switch( decl->Declaration.Interpolate ) {
case TGSI_INTERPOLATE_CONSTANT:
eval = eval_constant_coef;
break;
@@ -1479,7 +1485,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MOV:
- /* TGSI_OPCODE_SWZ */
+ case TGSI_OPCODE_SWZ:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
STORE( &r[0], 0, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 8018bd7fa4..cdbdf5c882 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -194,22 +194,12 @@ get_coef(
}
-#ifdef WIN32
-static void
-emit_retw(
- struct x86_function *func,
- unsigned size )
-{
- x86_retw( func, size );
-}
-#else
static void
emit_ret(
struct x86_function *func )
{
x86_ret( func );
}
-#endif
/**
@@ -475,7 +465,7 @@ static void
emit_func_call_dst(
struct x86_function *func,
unsigned xmm_dst,
- void (*code)() )
+ void (PIPE_CDECL *code)() )
{
sse_movaps(
func,
@@ -496,9 +486,7 @@ emit_func_call_dst(
x86_push( func, ecx );
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
-#ifndef WIN32
x86_pop(func, ecx );
-#endif
}
@@ -516,7 +504,7 @@ emit_func_call_dst_src(
struct x86_function *func,
unsigned xmm_dst,
unsigned xmm_src,
- void (*code)() )
+ void (PIPE_CDECL *code)() )
{
sse_movaps(
func,
@@ -558,7 +546,7 @@ emit_add(
make_xmm( xmm_src ) );
}
-static void XSTDCALL
+static void PIPE_CDECL
cos4f(
float *store )
{
@@ -581,7 +569,7 @@ emit_cos(
cos4f );
}
-static void XSTDCALL
+static void PIPE_CDECL
ex24f(
float *store )
{
@@ -615,7 +603,7 @@ emit_f2it(
make_xmm( xmm ) );
}
-static void XSTDCALL
+static void PIPE_CDECL
flr4f(
float *store )
{
@@ -638,7 +626,7 @@ emit_flr(
flr4f );
}
-static void XSTDCALL
+static void PIPE_CDECL
frc4f(
float *store )
{
@@ -661,7 +649,7 @@ emit_frc(
frc4f );
}
-static void XSTDCALL
+static void PIPE_CDECL
lg24f(
float *store )
{
@@ -720,7 +708,7 @@ emit_neg(
TGSI_EXEC_TEMP_80000000_C ) );
}
-static void XSTDCALL
+static void PIPE_CDECL
pow4f(
float *store )
{
@@ -820,7 +808,7 @@ emit_setsign(
TGSI_EXEC_TEMP_80000000_C ) );
}
-static void XSTDCALL
+static void PIPE_CDECL
sin4f(
float *store )
{
@@ -1190,7 +1178,7 @@ emit_instruction(
break;
case TGSI_OPCODE_MOV:
- /* TGSI_OPCODE_SWZ */
+ case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
STORE( func, *inst, 0, 0, chan_index );
@@ -1736,11 +1724,7 @@ emit_instruction(
break;
case TGSI_OPCODE_RET:
-#ifdef WIN32
- emit_retw( func, 16 );
-#else
emit_ret( func );
-#endif
break;
case TGSI_OPCODE_END:
@@ -1923,16 +1907,14 @@ emit_declaration(
unsigned first, last, mask;
unsigned i, j;
- assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
for( i = first; i <= last; i++ ) {
for( j = 0; j < NUM_CHANNELS; j++ ) {
if( mask & (1 << j) ) {
- switch( decl->Interpolation.Interpolate ) {
+ switch( decl->Declaration.Interpolate ) {
case TGSI_INTERPOLATE_CONSTANT:
emit_coef_a0( func, 0, i, j );
emit_inputs( func, 0, i, j );
@@ -2283,11 +2265,7 @@ tgsi_emit_sse2(
func,
get_immediate_base() );
-#ifdef WIN32
- emit_retw( func, 16 );
-#else
emit_ret( func );
-#endif
tgsi_parse_free( &parse );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index 9c883ab704..18e44b38c2 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -90,9 +90,8 @@ tgsi_default_declaration( void )
declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
declaration.Size = 1;
declaration.File = TGSI_FILE_NULL;
- declaration.Declare = TGSI_DECLARE_RANGE;
declaration.UsageMask = TGSI_WRITEMASK_XYZW;
- declaration.Interpolate = 0;
+ declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
declaration.Semantic = 0;
declaration.Padding = 0;
declaration.Extended = 0;
@@ -103,7 +102,6 @@ tgsi_default_declaration( void )
struct tgsi_declaration
tgsi_build_declaration(
unsigned file,
- unsigned declare,
unsigned usage_mask,
unsigned interpolate,
unsigned semantic,
@@ -112,11 +110,10 @@ tgsi_build_declaration(
struct tgsi_declaration declaration;
assert( file <= TGSI_FILE_IMMEDIATE );
- assert( declare <= TGSI_DECLARE_MASK );
+ assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
declaration = tgsi_default_declaration();
declaration.File = file;
- declaration.Declare = declare;
declaration.UsageMask = usage_mask;
declaration.Interpolate = interpolate;
declaration.Semantic = semantic;
@@ -144,7 +141,7 @@ tgsi_default_full_declaration( void )
struct tgsi_full_declaration full_declaration;
full_declaration.Declaration = tgsi_default_declaration();
- full_declaration.Interpolation = tgsi_default_declaration_interpolation();
+ full_declaration.DeclarationRange = tgsi_default_declaration_range();
full_declaration.Semantic = tgsi_default_declaration_semantic();
return full_declaration;
@@ -159,6 +156,7 @@ tgsi_build_full_declaration(
{
unsigned size = 0;
struct tgsi_declaration *declaration;
+ struct tgsi_declaration_range *dr;
if( maxsize <= size )
return 0;
@@ -167,63 +165,21 @@ tgsi_build_full_declaration(
*declaration = tgsi_build_declaration(
full_decl->Declaration.File,
- full_decl->Declaration.Declare,
full_decl->Declaration.UsageMask,
full_decl->Declaration.Interpolate,
full_decl->Declaration.Semantic,
header );
- switch( full_decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- {
- struct tgsi_declaration_range *dr;
-
- if( maxsize <= size )
- return 0;
- dr = (struct tgsi_declaration_range *) &tokens[size];
- size++;
-
- *dr = tgsi_build_declaration_range(
- full_decl->u.DeclarationRange.First,
- full_decl->u.DeclarationRange.Last,
- declaration,
- header );
- break;
- }
-
- case TGSI_DECLARE_MASK:
- {
- struct tgsi_declaration_mask *dm;
-
- if( maxsize <= size )
- return 0;
- dm = (struct tgsi_declaration_mask *) &tokens[size];
- size++;
-
- *dm = tgsi_build_declaration_mask(
- full_decl->u.DeclarationMask.Mask,
- declaration,
- header );
- break;
- }
-
- default:
- assert( 0 );
- }
-
- if( full_decl->Declaration.Interpolate ) {
- struct tgsi_declaration_interpolation *di;
-
- if( maxsize <= size )
- return 0;
- di = (struct tgsi_declaration_interpolation *) &tokens[size];
- size++;
+ if (maxsize <= size)
+ return 0;
+ dr = (struct tgsi_declaration_range *) &tokens[size];
+ size++;
- *di = tgsi_build_declaration_interpolation(
- full_decl->Interpolation.Interpolate,
- declaration,
- header );
- }
+ *dr = tgsi_build_declaration_range(
+ full_decl->DeclarationRange.First,
+ full_decl->DeclarationRange.Last,
+ declaration,
+ header );
if( full_decl->Declaration.Semantic ) {
struct tgsi_declaration_semantic *ds;
@@ -244,6 +200,17 @@ tgsi_build_full_declaration(
}
struct tgsi_declaration_range
+tgsi_default_declaration_range( void )
+{
+ struct tgsi_declaration_range dr;
+
+ dr.First = 0;
+ dr.Last = 0;
+
+ return dr;
+}
+
+struct tgsi_declaration_range
tgsi_build_declaration_range(
unsigned first,
unsigned last,
@@ -255,6 +222,7 @@ tgsi_build_declaration_range(
assert( last >= first );
assert( last <= 0xFFFF );
+ declaration_range = tgsi_default_declaration_range();
declaration_range.First = first;
declaration_range.Last = last;
@@ -263,50 +231,6 @@ tgsi_build_declaration_range(
return declaration_range;
}
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
- unsigned mask,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_mask declaration_mask;
-
- declaration_mask.Mask = mask;
-
- declaration_grow( declaration, header );
-
- return declaration_mask;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void )
-{
- struct tgsi_declaration_interpolation di;
-
- di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
- di.Padding = 0;
-
- return di;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
- unsigned interpolate,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_interpolation di;
-
- assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
-
- di = tgsi_default_declaration_interpolation();
- di.Interpolate = interpolate;
-
- declaration_grow( declaration, header );
-
- return di;
-}
-
struct tgsi_declaration_semantic
tgsi_default_declaration_semantic( void )
{
@@ -704,6 +628,14 @@ tgsi_build_full_instruction(
tgsi_default_src_register_ext_swz() ) ) {
struct tgsi_src_register_ext_swz *src_register_ext_swz;
+ /* Use of the extended swizzle requires the simple swizzle to be identity.
+ */
+ assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X );
+ assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y );
+ assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z );
+ assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W );
+ assert( reg->SrcRegister.Negate == FALSE );
+
if( maxsize <= size )
return 0;
src_register_ext_swz =
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 80bffc4ae7..423cf141f5 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -37,7 +37,6 @@ tgsi_default_declaration( void );
struct tgsi_declaration
tgsi_build_declaration(
unsigned file,
- unsigned declare,
unsigned usage_mask,
unsigned interpolate,
unsigned semantic,
@@ -54,27 +53,15 @@ tgsi_build_full_declaration(
unsigned maxsize );
struct tgsi_declaration_range
+tgsi_default_declaration_range( void );
+
+struct tgsi_declaration_range
tgsi_build_declaration_range(
unsigned first,
unsigned last,
struct tgsi_declaration *declaration,
struct tgsi_header *header );
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
- unsigned mask,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void );
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
- unsigned interpolate,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
struct tgsi_declaration_semantic
tgsi_default_declaration_semantic( void );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 4c65ffd780..92aff88925 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -539,26 +539,20 @@ static const char *TGSI_MODULATES[] =
"MODULATE_EIGHTH"
};
-static void
-dump_declaration_short(
- struct tgsi_full_declaration *decl )
+void
+tgsi_dump_declaration(
+ const struct tgsi_full_declaration *decl )
{
TXT( "\nDCL " );
ENM( decl->Declaration.File, TGSI_FILES_SHORT );
- switch( decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- CHR( '[' );
- UID( decl->u.DeclarationRange.First );
- if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) {
- TXT( ".." );
- UID( decl->u.DeclarationRange.Last );
- }
- CHR( ']' );
- break;
- default:
- assert( 0 );
+ CHR( '[' );
+ UID( decl->DeclarationRange.First );
+ if (decl->DeclarationRange.First != decl->DeclarationRange.Last) {
+ TXT( ".." );
+ UID( decl->DeclarationRange.Last );
}
+ CHR( ']' );
if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
CHR( '.' );
@@ -586,10 +580,8 @@ dump_declaration_short(
}
}
- if (decl->Declaration.Interpolate) {
- TXT( ", " );
- ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
- }
+ TXT( ", " );
+ ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT );
}
static void
@@ -601,8 +593,6 @@ dump_declaration_verbose(
{
TXT( "\nFile : " );
ENM( decl->Declaration.File, TGSI_FILES );
- TXT( "\nDeclare : " );
- ENM( decl->Declaration.Declare, TGSI_DECLARES );
if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
TXT( "\nUsageMask : " );
if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
@@ -620,7 +610,7 @@ dump_declaration_verbose(
}
if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
TXT( "\nInterpolate: " );
- UID( decl->Declaration.Interpolate );
+ ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
}
if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
TXT( "\nSemantic : " );
@@ -632,32 +622,10 @@ dump_declaration_verbose(
}
EOL();
- switch( decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- TXT( "\nFirst: " );
- UID( decl->u.DeclarationRange.First );
- TXT( "\nLast : " );
- UID( decl->u.DeclarationRange.Last );
- break;
-
- case TGSI_DECLARE_MASK:
- TXT( "\nMask: " );
- UIX( decl->u.DeclarationMask.Mask );
- break;
-
- default:
- assert( 0 );
- }
-
- if( decl->Declaration.Interpolate ) {
- EOL();
- TXT( "\nInterpolate: " );
- ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( decl->Interpolation.Padding );
- }
- }
+ TXT( "\nFirst: " );
+ UID( decl->DeclarationRange.First );
+ TXT( "\nLast : " );
+ UID( decl->DeclarationRange.Last );
if( decl->Declaration.Semantic ) {
EOL();
@@ -672,9 +640,9 @@ dump_declaration_verbose(
}
}
-static void
-dump_immediate_short(
- struct tgsi_full_immediate *imm )
+void
+tgsi_dump_immediate(
+ const struct tgsi_full_immediate *imm )
{
unsigned i;
@@ -727,9 +695,9 @@ dump_immediate_verbose(
}
}
-static void
-dump_instruction_short(
- struct tgsi_full_instruction *inst,
+void
+tgsi_dump_instruction(
+ const struct tgsi_full_instruction *inst,
unsigned instno )
{
unsigned i;
@@ -754,7 +722,7 @@ dump_instruction_short(
}
for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
- struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+ const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
if( !first_reg ) {
CHR( ',' );
@@ -812,7 +780,7 @@ dump_instruction_short(
}
for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
- struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
if( !first_reg ) {
CHR( ',' );
@@ -835,7 +803,14 @@ dump_instruction_short(
ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
CHR( '[' );
- SID( src->SrcRegister.Index );
+ if (src->SrcRegister.Indirect) {
+ TXT( "addr" );
+ if (src->SrcRegister.Index > 0)
+ CHR( '+' );
+ SID( src->SrcRegister.Index );
+ }
+ else
+ SID( src->SrcRegister.Index );
CHR( ']' );
if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
@@ -1281,17 +1256,17 @@ tgsi_dump(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- dump_declaration_short(
+ tgsi_dump_declaration(
&parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- dump_immediate_short(
+ tgsi_dump_immediate(
&parse.FullToken.FullImmediate );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- dump_instruction_short(
+ tgsi_dump_instruction(
&parse.FullToken.FullInstruction,
instno );
instno++;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index beb0155d56..ca83bdef20 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -14,6 +14,24 @@ tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags );
+struct tgsi_full_immediate;
+struct tgsi_full_instruction;
+struct tgsi_full_declaration;
+
+void
+tgsi_dump_immediate(
+ const struct tgsi_full_immediate *imm );
+
+void
+tgsi_dump_instruction(
+ const struct tgsi_full_instruction *inst,
+ unsigned instno );
+
+void
+tgsi_dump_declaration(
+ const struct tgsi_full_declaration *decl );
+
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index 5c0b0bfd61..d16f0cdcad 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -118,22 +118,7 @@ tgsi_parse_token(
*decl = tgsi_default_full_declaration();
decl->Declaration = *(struct tgsi_declaration *) &token;
- switch( decl->Declaration.Type ) {
- case TGSI_DECLARE_RANGE:
- next_token( ctx, &decl->u.DeclarationRange );
- break;
-
- case TGSI_DECLARE_MASK:
- next_token( ctx, &decl->u.DeclarationMask );
- break;
-
- default:
- assert (0);
- }
-
- if( decl->Declaration.Interpolate ) {
- next_token( ctx, &decl->Interpolation );
- }
+ next_token( ctx, &decl->DeclarationRange );
if( decl->Declaration.Semantic ) {
next_token( ctx, &decl->Semantic );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 4102101093..054350712d 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -65,13 +65,8 @@ struct tgsi_full_src_register
struct tgsi_full_declaration
{
struct tgsi_declaration Declaration;
- union
- {
- struct tgsi_declaration_range DeclarationRange;
- struct tgsi_declaration_mask DeclarationMask;
- } u;
- struct tgsi_declaration_interpolation Interpolation;
- struct tgsi_declaration_semantic Semantic;
+ struct tgsi_declaration_range DeclarationRange;
+ struct tgsi_declaration_semantic Semantic;
};
struct tgsi_full_immediate
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index 65650ed22a..240aaaf362 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -93,8 +93,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
= &parse.FullToken.FullDeclaration;
uint file = fulldecl->Declaration.File;
uint i;
- for (i = fulldecl->u.DeclarationRange.First;
- i <= fulldecl->u.DeclarationRange.Last;
+ for (i = fulldecl->DeclarationRange.First;
+ i <= fulldecl->DeclarationRange.Last;
i++) {
/* only first 32 regs will appear in this bitfield */
@@ -141,3 +141,86 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
tgsi_parse_free (&parse);
}
+
+
+
+/**
+ * Check if the given shader is a "passthrough" shader consisting of only
+ * MOV instructions of the form: MOV OUT[n], IN[n]
+ *
+ */
+boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
+{
+ struct tgsi_parse_context parse;
+
+ /**
+ ** Setup to begin parsing input shader
+ **/
+ if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
+ debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n");
+ return FALSE;
+ }
+
+ /**
+ ** Loop over incoming program tokens/instructions
+ */
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *fullinst =
+ &parse.FullToken.FullInstruction;
+ const struct tgsi_full_src_register *src =
+ &fullinst->FullSrcRegisters[0];
+ const struct tgsi_full_dst_register *dst =
+ &fullinst->FullDstRegisters[0];
+
+ /* Do a whole bunch of checks for a simple move */
+ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
+ src->SrcRegister.File != TGSI_FILE_INPUT ||
+ dst->DstRegister.File != TGSI_FILE_OUTPUT ||
+ src->SrcRegister.Index != dst->DstRegister.Index ||
+
+ src->SrcRegister.Negate ||
+ src->SrcRegisterExtMod.Negate ||
+ src->SrcRegisterExtMod.Absolute ||
+ src->SrcRegisterExtMod.Scale2X ||
+ src->SrcRegisterExtMod.Bias ||
+ src->SrcRegisterExtMod.Complement ||
+
+ src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+ src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+ src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+ src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
+
+ src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+ src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+ src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+ src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W ||
+
+ dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
+ {
+ tgsi_parse_free(&parse);
+ return FALSE;
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* fall-through */
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* fall-through */
+ default:
+ ; /* no-op */
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ /* if we get here, it's a pass-through shader */
+ return TRUE;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index 0530bc6b51..5cb6efb343 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -67,4 +67,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
struct tgsi_shader_info *info);
+extern boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens);
+
+
#endif /* TGSI_SCAN_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
index 4cdd89182a..56a50d3b21 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
@@ -8,7 +8,7 @@
union pointer_hack
{
void *pointer;
- unsigned long long uint64;
+ uint64_t uint64;
};
void *
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index b04bc6eefd..b93fbf9033 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -30,6 +30,7 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
+#include "pipe/p_config.h"
#include "pipe/p_util.h"
#include "pipe/p_state.h"
#include "translate.h"
@@ -38,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
translate = translate_sse2_create( key );
if (translate)
return translate;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index b8210af50c..c3b754a902 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -71,15 +71,15 @@ struct translate {
const void *ptr,
unsigned stride );
- void (*run_elts)( struct translate *,
- const unsigned *elts,
- unsigned count,
- void *output_buffer);
-
- void (*run)( struct translate *,
- unsigned start,
- unsigned count,
- void *output_buffer);
+ void (PIPE_CDECL *run_elts)( struct translate *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run)( struct translate *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
};
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 402780ee53..3fec89b36e 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -255,140 +255,140 @@ static fetch_func get_fetch_func( enum pipe_format format )
{
switch (format) {
case PIPE_FORMAT_R64_FLOAT:
- return fetch_R64_FLOAT;
+ return &fetch_R64_FLOAT;
case PIPE_FORMAT_R64G64_FLOAT:
- return fetch_R64G64_FLOAT;
+ return &fetch_R64G64_FLOAT;
case PIPE_FORMAT_R64G64B64_FLOAT:
- return fetch_R64G64B64_FLOAT;
+ return &fetch_R64G64B64_FLOAT;
case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return fetch_R64G64B64A64_FLOAT;
+ return &fetch_R64G64B64A64_FLOAT;
case PIPE_FORMAT_R32_FLOAT:
- return fetch_R32_FLOAT;
+ return &fetch_R32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT:
- return fetch_R32G32_FLOAT;
+ return &fetch_R32G32_FLOAT;
case PIPE_FORMAT_R32G32B32_FLOAT:
- return fetch_R32G32B32_FLOAT;
+ return &fetch_R32G32B32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return fetch_R32G32B32A32_FLOAT;
+ return &fetch_R32G32B32A32_FLOAT;
case PIPE_FORMAT_R32_UNORM:
- return fetch_R32_UNORM;
+ return &fetch_R32_UNORM;
case PIPE_FORMAT_R32G32_UNORM:
- return fetch_R32G32_UNORM;
+ return &fetch_R32G32_UNORM;
case PIPE_FORMAT_R32G32B32_UNORM:
- return fetch_R32G32B32_UNORM;
+ return &fetch_R32G32B32_UNORM;
case PIPE_FORMAT_R32G32B32A32_UNORM:
- return fetch_R32G32B32A32_UNORM;
+ return &fetch_R32G32B32A32_UNORM;
case PIPE_FORMAT_R32_USCALED:
- return fetch_R32_USCALED;
+ return &fetch_R32_USCALED;
case PIPE_FORMAT_R32G32_USCALED:
- return fetch_R32G32_USCALED;
+ return &fetch_R32G32_USCALED;
case PIPE_FORMAT_R32G32B32_USCALED:
- return fetch_R32G32B32_USCALED;
+ return &fetch_R32G32B32_USCALED;
case PIPE_FORMAT_R32G32B32A32_USCALED:
- return fetch_R32G32B32A32_USCALED;
+ return &fetch_R32G32B32A32_USCALED;
case PIPE_FORMAT_R32_SNORM:
- return fetch_R32_SNORM;
+ return &fetch_R32_SNORM;
case PIPE_FORMAT_R32G32_SNORM:
- return fetch_R32G32_SNORM;
+ return &fetch_R32G32_SNORM;
case PIPE_FORMAT_R32G32B32_SNORM:
- return fetch_R32G32B32_SNORM;
+ return &fetch_R32G32B32_SNORM;
case PIPE_FORMAT_R32G32B32A32_SNORM:
- return fetch_R32G32B32A32_SNORM;
+ return &fetch_R32G32B32A32_SNORM;
case PIPE_FORMAT_R32_SSCALED:
- return fetch_R32_SSCALED;
+ return &fetch_R32_SSCALED;
case PIPE_FORMAT_R32G32_SSCALED:
- return fetch_R32G32_SSCALED;
+ return &fetch_R32G32_SSCALED;
case PIPE_FORMAT_R32G32B32_SSCALED:
- return fetch_R32G32B32_SSCALED;
+ return &fetch_R32G32B32_SSCALED;
case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return fetch_R32G32B32A32_SSCALED;
+ return &fetch_R32G32B32A32_SSCALED;
case PIPE_FORMAT_R16_UNORM:
- return fetch_R16_UNORM;
+ return &fetch_R16_UNORM;
case PIPE_FORMAT_R16G16_UNORM:
- return fetch_R16G16_UNORM;
+ return &fetch_R16G16_UNORM;
case PIPE_FORMAT_R16G16B16_UNORM:
- return fetch_R16G16B16_UNORM;
+ return &fetch_R16G16B16_UNORM;
case PIPE_FORMAT_R16G16B16A16_UNORM:
- return fetch_R16G16B16A16_UNORM;
+ return &fetch_R16G16B16A16_UNORM;
case PIPE_FORMAT_R16_USCALED:
- return fetch_R16_USCALED;
+ return &fetch_R16_USCALED;
case PIPE_FORMAT_R16G16_USCALED:
- return fetch_R16G16_USCALED;
+ return &fetch_R16G16_USCALED;
case PIPE_FORMAT_R16G16B16_USCALED:
- return fetch_R16G16B16_USCALED;
+ return &fetch_R16G16B16_USCALED;
case PIPE_FORMAT_R16G16B16A16_USCALED:
- return fetch_R16G16B16A16_USCALED;
+ return &fetch_R16G16B16A16_USCALED;
case PIPE_FORMAT_R16_SNORM:
- return fetch_R16_SNORM;
+ return &fetch_R16_SNORM;
case PIPE_FORMAT_R16G16_SNORM:
- return fetch_R16G16_SNORM;
+ return &fetch_R16G16_SNORM;
case PIPE_FORMAT_R16G16B16_SNORM:
- return fetch_R16G16B16_SNORM;
+ return &fetch_R16G16B16_SNORM;
case PIPE_FORMAT_R16G16B16A16_SNORM:
- return fetch_R16G16B16A16_SNORM;
+ return &fetch_R16G16B16A16_SNORM;
case PIPE_FORMAT_R16_SSCALED:
- return fetch_R16_SSCALED;
+ return &fetch_R16_SSCALED;
case PIPE_FORMAT_R16G16_SSCALED:
- return fetch_R16G16_SSCALED;
+ return &fetch_R16G16_SSCALED;
case PIPE_FORMAT_R16G16B16_SSCALED:
- return fetch_R16G16B16_SSCALED;
+ return &fetch_R16G16B16_SSCALED;
case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return fetch_R16G16B16A16_SSCALED;
+ return &fetch_R16G16B16A16_SSCALED;
case PIPE_FORMAT_R8_UNORM:
- return fetch_R8_UNORM;
+ return &fetch_R8_UNORM;
case PIPE_FORMAT_R8G8_UNORM:
- return fetch_R8G8_UNORM;
+ return &fetch_R8G8_UNORM;
case PIPE_FORMAT_R8G8B8_UNORM:
- return fetch_R8G8B8_UNORM;
+ return &fetch_R8G8B8_UNORM;
case PIPE_FORMAT_R8G8B8A8_UNORM:
- return fetch_R8G8B8A8_UNORM;
+ return &fetch_R8G8B8A8_UNORM;
case PIPE_FORMAT_R8_USCALED:
- return fetch_R8_USCALED;
+ return &fetch_R8_USCALED;
case PIPE_FORMAT_R8G8_USCALED:
- return fetch_R8G8_USCALED;
+ return &fetch_R8G8_USCALED;
case PIPE_FORMAT_R8G8B8_USCALED:
- return fetch_R8G8B8_USCALED;
+ return &fetch_R8G8B8_USCALED;
case PIPE_FORMAT_R8G8B8A8_USCALED:
- return fetch_R8G8B8A8_USCALED;
+ return &fetch_R8G8B8A8_USCALED;
case PIPE_FORMAT_R8_SNORM:
- return fetch_R8_SNORM;
+ return &fetch_R8_SNORM;
case PIPE_FORMAT_R8G8_SNORM:
- return fetch_R8G8_SNORM;
+ return &fetch_R8G8_SNORM;
case PIPE_FORMAT_R8G8B8_SNORM:
- return fetch_R8G8B8_SNORM;
+ return &fetch_R8G8B8_SNORM;
case PIPE_FORMAT_R8G8B8A8_SNORM:
- return fetch_R8G8B8A8_SNORM;
+ return &fetch_R8G8B8A8_SNORM;
case PIPE_FORMAT_R8_SSCALED:
- return fetch_R8_SSCALED;
+ return &fetch_R8_SSCALED;
case PIPE_FORMAT_R8G8_SSCALED:
- return fetch_R8G8_SSCALED;
+ return &fetch_R8G8_SSCALED;
case PIPE_FORMAT_R8G8B8_SSCALED:
- return fetch_R8G8B8_SSCALED;
+ return &fetch_R8G8B8_SSCALED;
case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return fetch_R8G8B8A8_SSCALED;
+ return &fetch_R8G8B8A8_SSCALED;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return fetch_A8R8G8B8_UNORM;
+ return &fetch_A8R8G8B8_UNORM;
case PIPE_FORMAT_B8G8R8A8_UNORM:
- return fetch_B8G8R8A8_UNORM;
+ return &fetch_B8G8R8A8_UNORM;
default:
assert(0);
- return fetch_NULL;
+ return &fetch_NULL;
}
}
@@ -399,140 +399,140 @@ static emit_func get_emit_func( enum pipe_format format )
{
switch (format) {
case PIPE_FORMAT_R64_FLOAT:
- return emit_R64_FLOAT;
+ return &emit_R64_FLOAT;
case PIPE_FORMAT_R64G64_FLOAT:
- return emit_R64G64_FLOAT;
+ return &emit_R64G64_FLOAT;
case PIPE_FORMAT_R64G64B64_FLOAT:
- return emit_R64G64B64_FLOAT;
+ return &emit_R64G64B64_FLOAT;
case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return emit_R64G64B64A64_FLOAT;
+ return &emit_R64G64B64A64_FLOAT;
case PIPE_FORMAT_R32_FLOAT:
- return emit_R32_FLOAT;
+ return &emit_R32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT:
- return emit_R32G32_FLOAT;
+ return &emit_R32G32_FLOAT;
case PIPE_FORMAT_R32G32B32_FLOAT:
- return emit_R32G32B32_FLOAT;
+ return &emit_R32G32B32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return emit_R32G32B32A32_FLOAT;
+ return &emit_R32G32B32A32_FLOAT;
case PIPE_FORMAT_R32_UNORM:
- return emit_R32_UNORM;
+ return &emit_R32_UNORM;
case PIPE_FORMAT_R32G32_UNORM:
- return emit_R32G32_UNORM;
+ return &emit_R32G32_UNORM;
case PIPE_FORMAT_R32G32B32_UNORM:
- return emit_R32G32B32_UNORM;
+ return &emit_R32G32B32_UNORM;
case PIPE_FORMAT_R32G32B32A32_UNORM:
- return emit_R32G32B32A32_UNORM;
+ return &emit_R32G32B32A32_UNORM;
case PIPE_FORMAT_R32_USCALED:
- return emit_R32_USCALED;
+ return &emit_R32_USCALED;
case PIPE_FORMAT_R32G32_USCALED:
- return emit_R32G32_USCALED;
+ return &emit_R32G32_USCALED;
case PIPE_FORMAT_R32G32B32_USCALED:
- return emit_R32G32B32_USCALED;
+ return &emit_R32G32B32_USCALED;
case PIPE_FORMAT_R32G32B32A32_USCALED:
- return emit_R32G32B32A32_USCALED;
+ return &emit_R32G32B32A32_USCALED;
case PIPE_FORMAT_R32_SNORM:
- return emit_R32_SNORM;
+ return &emit_R32_SNORM;
case PIPE_FORMAT_R32G32_SNORM:
- return emit_R32G32_SNORM;
+ return &emit_R32G32_SNORM;
case PIPE_FORMAT_R32G32B32_SNORM:
- return emit_R32G32B32_SNORM;
+ return &emit_R32G32B32_SNORM;
case PIPE_FORMAT_R32G32B32A32_SNORM:
- return emit_R32G32B32A32_SNORM;
+ return &emit_R32G32B32A32_SNORM;
case PIPE_FORMAT_R32_SSCALED:
- return emit_R32_SSCALED;
+ return &emit_R32_SSCALED;
case PIPE_FORMAT_R32G32_SSCALED:
- return emit_R32G32_SSCALED;
+ return &emit_R32G32_SSCALED;
case PIPE_FORMAT_R32G32B32_SSCALED:
- return emit_R32G32B32_SSCALED;
+ return &emit_R32G32B32_SSCALED;
case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return emit_R32G32B32A32_SSCALED;
+ return &emit_R32G32B32A32_SSCALED;
case PIPE_FORMAT_R16_UNORM:
- return emit_R16_UNORM;
+ return &emit_R16_UNORM;
case PIPE_FORMAT_R16G16_UNORM:
- return emit_R16G16_UNORM;
+ return &emit_R16G16_UNORM;
case PIPE_FORMAT_R16G16B16_UNORM:
- return emit_R16G16B16_UNORM;
+ return &emit_R16G16B16_UNORM;
case PIPE_FORMAT_R16G16B16A16_UNORM:
- return emit_R16G16B16A16_UNORM;
+ return &emit_R16G16B16A16_UNORM;
case PIPE_FORMAT_R16_USCALED:
- return emit_R16_USCALED;
+ return &emit_R16_USCALED;
case PIPE_FORMAT_R16G16_USCALED:
- return emit_R16G16_USCALED;
+ return &emit_R16G16_USCALED;
case PIPE_FORMAT_R16G16B16_USCALED:
- return emit_R16G16B16_USCALED;
+ return &emit_R16G16B16_USCALED;
case PIPE_FORMAT_R16G16B16A16_USCALED:
- return emit_R16G16B16A16_USCALED;
+ return &emit_R16G16B16A16_USCALED;
case PIPE_FORMAT_R16_SNORM:
- return emit_R16_SNORM;
+ return &emit_R16_SNORM;
case PIPE_FORMAT_R16G16_SNORM:
- return emit_R16G16_SNORM;
+ return &emit_R16G16_SNORM;
case PIPE_FORMAT_R16G16B16_SNORM:
- return emit_R16G16B16_SNORM;
+ return &emit_R16G16B16_SNORM;
case PIPE_FORMAT_R16G16B16A16_SNORM:
- return emit_R16G16B16A16_SNORM;
+ return &emit_R16G16B16A16_SNORM;
case PIPE_FORMAT_R16_SSCALED:
- return emit_R16_SSCALED;
+ return &emit_R16_SSCALED;
case PIPE_FORMAT_R16G16_SSCALED:
- return emit_R16G16_SSCALED;
+ return &emit_R16G16_SSCALED;
case PIPE_FORMAT_R16G16B16_SSCALED:
- return emit_R16G16B16_SSCALED;
+ return &emit_R16G16B16_SSCALED;
case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return emit_R16G16B16A16_SSCALED;
+ return &emit_R16G16B16A16_SSCALED;
case PIPE_FORMAT_R8_UNORM:
- return emit_R8_UNORM;
+ return &emit_R8_UNORM;
case PIPE_FORMAT_R8G8_UNORM:
- return emit_R8G8_UNORM;
+ return &emit_R8G8_UNORM;
case PIPE_FORMAT_R8G8B8_UNORM:
- return emit_R8G8B8_UNORM;
+ return &emit_R8G8B8_UNORM;
case PIPE_FORMAT_R8G8B8A8_UNORM:
- return emit_R8G8B8A8_UNORM;
+ return &emit_R8G8B8A8_UNORM;
case PIPE_FORMAT_R8_USCALED:
- return emit_R8_USCALED;
+ return &emit_R8_USCALED;
case PIPE_FORMAT_R8G8_USCALED:
- return emit_R8G8_USCALED;
+ return &emit_R8G8_USCALED;
case PIPE_FORMAT_R8G8B8_USCALED:
- return emit_R8G8B8_USCALED;
+ return &emit_R8G8B8_USCALED;
case PIPE_FORMAT_R8G8B8A8_USCALED:
- return emit_R8G8B8A8_USCALED;
+ return &emit_R8G8B8A8_USCALED;
case PIPE_FORMAT_R8_SNORM:
- return emit_R8_SNORM;
+ return &emit_R8_SNORM;
case PIPE_FORMAT_R8G8_SNORM:
- return emit_R8G8_SNORM;
+ return &emit_R8G8_SNORM;
case PIPE_FORMAT_R8G8B8_SNORM:
- return emit_R8G8B8_SNORM;
+ return &emit_R8G8B8_SNORM;
case PIPE_FORMAT_R8G8B8A8_SNORM:
- return emit_R8G8B8A8_SNORM;
+ return &emit_R8G8B8A8_SNORM;
case PIPE_FORMAT_R8_SSCALED:
- return emit_R8_SSCALED;
+ return &emit_R8_SSCALED;
case PIPE_FORMAT_R8G8_SSCALED:
- return emit_R8G8_SSCALED;
+ return &emit_R8G8_SSCALED;
case PIPE_FORMAT_R8G8B8_SSCALED:
- return emit_R8G8B8_SSCALED;
+ return &emit_R8G8B8_SSCALED;
case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return emit_R8G8B8A8_SSCALED;
+ return &emit_R8G8B8A8_SSCALED;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return emit_A8R8G8B8_UNORM;
+ return &emit_A8R8G8B8_UNORM;
case PIPE_FORMAT_B8G8R8A8_UNORM:
- return emit_B8G8R8A8_UNORM;
+ return &emit_B8G8R8A8_UNORM;
default:
assert(0);
- return emit_NULL;
+ return &emit_NULL;
}
}
@@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format )
/**
* Fetch vertex attributes for 'count' vertices.
*/
-static void generic_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
@@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate,
-static void generic_run( struct translate *translate,
- unsigned start,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL generic_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index a54ac5a82f..18a212ac1c 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -26,6 +26,7 @@
*/
+#include "pipe/p_config.h"
#include "pipe/p_compiler.h"
#include "pipe/p_util.h"
#include "util/u_simple_list.h"
@@ -33,7 +34,7 @@
#include "translate.h"
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
@@ -45,22 +46,16 @@
#define W 3
-#ifdef WIN32
-#define RTASM __cdecl
-#else
-#define RTASM
-#endif
-
-typedef void (RTASM *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- void *output_buffer );
-
-typedef void (RTASM *run_elts_func)( struct translate *translate,
- const unsigned *elts,
+typedef void (PIPE_CDECL *run_func)( struct translate *translate,
+ unsigned start,
unsigned count,
void *output_buffer );
+typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
struct translate_sse {
@@ -315,7 +310,7 @@ static void get_src_ptr( struct translate_sse *p,
static void emit_swizzle( struct translate_sse *p,
struct x86_reg dest,
struct x86_reg src,
- unsigned shuffle )
+ unsigned char shuffle )
{
sse_shufps(p->func, dest, src, shuffle);
}
@@ -472,13 +467,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride));
/* Incr index
- */ /* Emit code for each of the attributes. Currently routes
- * everything through SSE registers, even when it might be more
- * efficient to stick with regular old x86. No optimization or
- * other tricks - enough new ground to cover here just getting
- * things working.
- */
-
+ */
if (linear) {
x86_inc(p->func, idxEBX);
}
@@ -546,7 +535,7 @@ static void translate_sse_release( struct translate *translate )
FREE(p);
}
-static void translate_sse_run_elts( struct translate *translate,
+static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
void *output_buffer )
@@ -559,7 +548,7 @@ static void translate_sse_run_elts( struct translate *translate,
output_buffer );
}
-static void translate_sse_run( struct translate *translate,
+static void PIPE_CDECL translate_sse_run( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer )
@@ -617,7 +606,7 @@ struct translate *translate_sse2_create( const struct translate_key *key )
#else
-void translate_create_sse( const struct translate_key *key )
+struct translate *translate_sse2_create( const struct translate_key *key )
{
return NULL;
}
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 05bc43131a..db3d810a2e 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -15,8 +15,7 @@ C_SOURCES = \
u_mm.c \
u_simple_shaders.c \
u_snprintf.c \
- u_time.c \
- u_mm.c
+ u_time.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index ce7fb58956..9d56f8bfab 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -153,7 +153,9 @@ const char *
debug_get_option(const char *name, const char *dfault)
{
const char *result;
-#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+ /* EngMapFile creates the file if it does not exists, so it must either be
+ * disabled on release versions (or put in a less conspicuous place). */
#ifdef DEBUG
ULONG_PTR iFile = 0;
const void *pMap = NULL;
@@ -161,9 +163,6 @@ debug_get_option(const char *name, const char *dfault)
static char output[1024];
result = dfault;
- /* XXX: this creates the file if it does not exists, so it must either be
- * disabled on release versions, or put in a less conspicuous place.
- */
pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile);
if(pMap) {
sol = (const char *)pMap;
@@ -187,13 +186,15 @@ debug_get_option(const char *name, const char *dfault)
#else
result = dfault;
#endif
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ /* TODO: implement */
+ result = dfault;
#else
-
result = getenv(name);
if(!result)
result = dfault;
#endif
-
+
debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
return result;
@@ -229,8 +230,34 @@ debug_get_bool_option(const char *name, boolean dfault)
long
debug_get_num_option(const char *name, long dfault)
{
- /* FIXME */
- return dfault;
+ long result;
+ const char *str;
+
+ str = debug_get_option(name, NULL);
+ if(!str)
+ result = dfault;
+ else {
+ long sign;
+ char c;
+ c = *str++;
+ if(c == '-') {
+ sign = -1;
+ c = *str++;
+ }
+ else {
+ sign = 1;
+ }
+ result = 0;
+ while('0' <= c && c <= '9') {
+ result = result*10 + (c - '0');
+ c = *str++;
+ }
+ result *= sign;
+ }
+
+ debug_printf("%s: %s = %li\n", __FUNCTION__, name, result);
+
+ return result;
}
@@ -339,10 +366,12 @@ static const struct debug_named_value pipe_format_names[] = {
DEBUG_NAMED_VALUE(PIPE_FORMAT_A1R5G5B5_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_A4R4G4B4_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R5G6B5_UNORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A2B10G10R10_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_I8_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_UNORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_L16_UNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR),
DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR_REV),
DEBUG_NAMED_VALUE(PIPE_FORMAT_Z16_UNORM),
@@ -408,6 +437,9 @@ static const struct debug_named_value pipe_format_names[] = {
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SNORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B6G5R5_SNORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A8B8G8R8_SNORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_X8B8G8R8_SNORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SSCALED),
@@ -418,6 +450,8 @@ static const struct debug_named_value pipe_format_names[] = {
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA),
diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c
index 78497c5f6a..ed18c6540e 100644
--- a/src/gallium/auxiliary/util/p_debug_mem.c
+++ b/src/gallium/auxiliary/util/p_debug_mem.c
@@ -75,6 +75,12 @@ struct debug_memory_header
unsigned magic;
};
+struct debug_memory_footer
+{
+ unsigned magic;
+};
+
+
static struct list_head list = { &list, &list };
static unsigned long last_no = 0;
@@ -98,14 +104,24 @@ data_from_header(struct debug_memory_header *hdr)
return NULL;
}
+static INLINE struct debug_memory_footer *
+footer_from_header(struct debug_memory_header *hdr)
+{
+ if(hdr)
+ return (struct debug_memory_footer *)((char *)hdr + sizeof(struct debug_memory_header) + hdr->size);
+ else
+ return NULL;
+}
+
void *
debug_malloc(const char *file, unsigned line, const char *function,
size_t size)
{
struct debug_memory_header *hdr;
+ struct debug_memory_footer *ftr;
- hdr = real_malloc(sizeof(*hdr) + size);
+ hdr = real_malloc(sizeof(*hdr) + size + sizeof(*ftr));
if(!hdr)
return NULL;
@@ -116,6 +132,9 @@ debug_malloc(const char *file, unsigned line, const char *function,
hdr->size = size;
hdr->magic = DEBUG_MEMORY_MAGIC;
+ ftr = footer_from_header(hdr);
+ ftr->magic = DEBUG_MEMORY_MAGIC;
+
LIST_ADDTAIL(&hdr->head, &list);
return data_from_header(hdr);
@@ -126,6 +145,7 @@ debug_free(const char *file, unsigned line, const char *function,
void *ptr)
{
struct debug_memory_header *hdr;
+ struct debug_memory_footer *ftr;
if(!ptr)
return;
@@ -139,8 +159,17 @@ debug_free(const char *file, unsigned line, const char *function,
return;
}
+ ftr = footer_from_header(hdr);
+ if(ftr->magic != DEBUG_MEMORY_MAGIC) {
+ debug_printf("%s:%u:%s: buffer overflow %p\n",
+ hdr->file, hdr->line, hdr->function,
+ ptr);
+ debug_assert(0);
+ }
+
LIST_DEL(&hdr->head);
hdr->magic = 0;
+ ftr->magic = 0;
real_free(hdr);
}
@@ -160,6 +189,7 @@ debug_realloc(const char *file, unsigned line, const char *function,
void *old_ptr, size_t old_size, size_t new_size )
{
struct debug_memory_header *old_hdr, *new_hdr;
+ struct debug_memory_footer *old_ftr, *new_ftr;
void *new_ptr;
if(!old_ptr)
@@ -179,8 +209,16 @@ debug_realloc(const char *file, unsigned line, const char *function,
return NULL;
}
+ old_ftr = footer_from_header(old_hdr);
+ if(old_ftr->magic != DEBUG_MEMORY_MAGIC) {
+ debug_printf("%s:%u:%s: buffer overflow %p\n",
+ old_hdr->file, old_hdr->line, old_hdr->function,
+ old_ptr);
+ debug_assert(0);
+ }
+
/* alloc new */
- new_hdr = real_malloc(sizeof(*new_hdr) + new_size);
+ new_hdr = real_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr));
if(!new_hdr)
return NULL;
new_hdr->no = old_hdr->no;
@@ -189,14 +227,19 @@ debug_realloc(const char *file, unsigned line, const char *function,
new_hdr->function = old_hdr->function;
new_hdr->size = new_size;
new_hdr->magic = DEBUG_MEMORY_MAGIC;
- LIST_REPLACE(&old_hdr->head, &new_hdr->head);
+ new_ftr = footer_from_header(new_hdr);
+ new_ftr->magic = DEBUG_MEMORY_MAGIC;
+
+ LIST_REPLACE(&old_hdr->head, &new_hdr->head);
+
/* copy data */
new_ptr = data_from_header(new_hdr);
memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size );
/* free old */
old_hdr->magic = 0;
+ old_ftr->magic = 0;
real_free(old_hdr);
return new_ptr;
@@ -220,8 +263,8 @@ debug_memory_end(unsigned long start_no)
void *ptr;
hdr = LIST_ENTRY(struct debug_memory_header, entry, head);
ptr = data_from_header(hdr);
- if(start_no <= hdr->no && hdr->no < last_no ||
- last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) {
+ if((start_no <= hdr->no && hdr->no < last_no) ||
+ (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) {
debug_printf("%s:%u:%s: %u bytes at %p not freed\n",
hdr->file, hdr->line, hdr->function,
hdr->size, ptr);
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 999a3e5099..7b9415d49a 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -281,7 +281,7 @@ util_blit_pixels(struct blit_state *ctx,
texTemp.height[0] = srcH;
texTemp.depth[0] = 1;
texTemp.compressed = 0;
- texTemp.cpp = pf_get_bits(src->format) / 8;
+ texTemp.cpp = pf_get_size(src->format);
tex = screen->texture_create(screen, &texTemp);
if (!tex)
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 655e2c8259..06abb34d5a 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -255,20 +255,28 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
static INLINE uint
util_pack_z(enum pipe_format format, double z)
{
+ if (z == 0.0)
+ return 0;
+
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
+ if (z == 1.0)
+ return 0xffff;
return (uint) (z * 0xffff);
case PIPE_FORMAT_Z32_UNORM:
/* special-case to avoid overflow */
if (z == 1.0)
return 0xffffffff;
- else
- return (uint) (z * 0xffffffff);
+ return (uint) (z * 0xffffffff);
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
+ if (z == 1.0)
+ return 0xffffff;
return (uint) (z * 0xffffff);
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
+ if (z == 1.0)
+ return 0xffffff00;
return ((uint) (z * 0xffffff)) << 8;
default:
debug_print_format("gallium: unhandled format in util_pack_z()", format);
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 5f8d12191d..505d93d727 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -92,8 +92,8 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
decl.Semantic.SemanticName = semantic_names[i];
decl.Semantic.SemanticIndex = semantic_indexes[i];
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = i;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = i;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -107,8 +107,8 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = semantic_names[i];
decl.Semantic.SemanticIndex = semantic_indexes[i];
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = i;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = i;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -190,14 +190,13 @@ util_make_fragment_tex_shader(struct pipe_context *pipe,
/* declare TEX[0] input */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_INPUT;
+ /* XXX this could be linear... */
+ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
decl.Semantic.SemanticIndex = 0;
- /* XXX this could be linear... */
- decl.Declaration.Interpolate = 1;
- decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = 0;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -209,8 +208,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe,
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
decl.Semantic.SemanticIndex = 0;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = 0;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -219,8 +218,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe,
/* declare sampler */
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = 0;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -303,8 +302,8 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe,
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
decl.Semantic.SemanticIndex = 0;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = 0;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
@@ -316,8 +315,8 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe,
decl.Declaration.Semantic = 1;
decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
decl.Semantic.SemanticIndex = 0;
- decl.u.DeclarationRange.First =
- decl.u.DeclarationRange.Last = 0;
+ decl.DeclarationRange.First =
+ decl.DeclarationRange.Last = 0;
ti += tgsi_build_full_declaration(&decl,
&tokens[ti],
header,
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index 9b97050d51..49dce75289 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -33,26 +33,44 @@
*/
-#include "util/u_time.h"
+#include "pipe/p_config.h"
#if defined(PIPE_OS_LINUX)
#include <sys/time.h>
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
#include <windows.h>
#include <winddi.h>
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+#include <windows.h>
+extern VOID KeQuerySystemTime(PLARGE_INTEGER);
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
#include <windows.h>
#else
#error Unsupported OS
#endif
+#include "util/u_time.h"
+
-#if defined(PIPE_OS_WINDOWS)
-static LONGLONG frequency = 0;
-#if !defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
-#define EngQueryPerformanceFrequency(p) QueryPerformanceFrequency((LARGE_INTEGER*)(p))
-#define EngQueryPerformanceCounter(p) QueryPerformanceCounter((LARGE_INTEGER*)(p))
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+
+static int64_t frequency = 0;
+
+static INLINE void
+util_time_get_frequency(void)
+{
+ if(!frequency) {
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+ LONGLONG temp;
+ EngQueryPerformanceFrequency(&temp);
+ frequency = temp;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ LARGE_INTEGER temp;
+ QueryPerformanceFrequency(&temp);
+ frequency = temp.QuadPart;
#endif
+ }
+}
#endif
@@ -61,8 +79,20 @@ util_time_get(struct util_time *t)
{
#if defined(PIPE_OS_LINUX)
gettimeofday(&t->tv, NULL);
-#elif defined(PIPE_OS_WINDOWS)
- EngQueryPerformanceCounter(&t->counter);
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+ LONGLONG temp;
+ EngQueryPerformanceCounter(&temp);
+ t->counter = temp;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+ /* Updated every 10 miliseconds, measured in units of 100 nanoseconds.
+ * http://msdn.microsoft.com/en-us/library/ms801642.aspx */
+ LARGE_INTEGER temp;
+ KeQuerySystemTime(&temp);
+ t->counter = temp.QuadPart;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ LARGE_INTEGER temp;
+ QueryPerformanceCounter(&temp);
+ t->counter = temp.QuadPart;
#endif
}
@@ -75,10 +105,17 @@ util_time_add(const struct util_time *t1,
#if defined(PIPE_OS_LINUX)
t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000;
t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000;
-#elif defined(PIPE_OS_WINDOWS)
- if(!frequency)
- EngQueryPerformanceFrequency(&frequency);
- t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ util_time_get_frequency();
+ t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000);
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+ /* 1 tick = 100 nano seconds. */
+ t2->counter = t1->counter + usecs * 10;
+#elif
+ LARGE_INTEGER temp;
+ LONGLONG freq;
+ freq = temp.QuadPart;
+ t2->counter = t1->counter + (usecs * freq)/1000000L;
#endif
}
@@ -90,10 +127,11 @@ util_time_diff(const struct util_time *t1,
#if defined(PIPE_OS_LINUX)
return (t2->tv.tv_usec - t1->tv.tv_usec) +
(t2->tv.tv_sec - t1->tv.tv_sec)*1000000;
-#elif defined(PIPE_OS_WINDOWS)
- if(!frequency)
- EngQueryPerformanceFrequency(&frequency);
- return (t2->counter - t1->counter)*1000000LL/frequency;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ util_time_get_frequency();
+ return (t2->counter - t1->counter)*INT64_C(1000000)/frequency;
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+ return (t2->counter - t1->counter)/10;
#endif
}
@@ -142,7 +180,7 @@ util_time_timeout(const struct util_time *start,
}
-#if defined(PIPE_OS_WINDOWS)
+#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY)
void util_time_sleep(unsigned usecs)
{
LONGLONG start, curr, end;
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 48ec7a4a96..f9963ce0e2 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -61,7 +61,7 @@ struct util_time
#if defined(PIPE_OS_LINUX)
struct timeval tv;
#else
- long long counter;
+ int64_t counter;
#endif
};