summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/Makefile1
-rw-r--r--src/gallium/auxiliary/draw/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c98
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c69
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c76
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c105
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c244
23 files changed, 544 insertions, 210 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index f2e36a89e9..bdbf5a08ed 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -40,6 +40,7 @@ C_SOURCES = \
draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
+ draw_vs_ppc.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 544a04918b..5f05aa324a 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary(
'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
+ 'draw_vs_ppc.c',
'draw_vs_sse.c',
'draw_vs_varient.c'
])
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index fab8fc95fc..7bd4a2e221 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -343,6 +343,21 @@ draw_num_vs_outputs(const struct draw_context *draw)
}
+/**
+ * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers)
+{
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+}
+
+
+
void draw_set_render( struct draw_context *draw,
struct vbuf_render *render )
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index a29bb01d81..d529e4e9a2 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -45,7 +45,7 @@ struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
-
+struct tgsi_sampler;
struct draw_context *draw_create( void );
@@ -92,6 +92,12 @@ uint
draw_num_vs_outputs(const struct draw_context *draw);
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers);
+
+
/*
* Vertex shader functions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index b764d9c518..a0f9716dac 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
struct tgsi_full_immediate immed;
uint size = 4;
immed = tgsi_default_full_immediate();
- immed.Immediate.Size = 1 + size; /* one for the token itself */
+ immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
immed.u.Pointer = (void *) value;
ctx->emit_immediate(ctx, &immed);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 9825e116c3..12325d30d6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -29,12 +29,12 @@
* \file
* Vertex buffer drawing stage.
*
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
* \author Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -93,7 +93,6 @@ vbuf_stage( struct draw_stage *stage )
}
-static void vbuf_flush_indices( struct vbuf_stage *vbuf );
static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
@@ -109,13 +108,12 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
static INLINE void
check_space( struct vbuf_stage *vbuf, unsigned nr )
{
- if (vbuf->nr_vertices + nr > vbuf->max_vertices ) {
- vbuf_flush_vertices(vbuf);
- vbuf_alloc_vertices(vbuf);
+ if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
+ vbuf->nr_indices + nr > vbuf->max_indices)
+ {
+ vbuf_flush_vertices( vbuf );
+ vbuf_alloc_vertices( vbuf );
}
-
- if (vbuf->nr_indices + nr > vbuf->max_indices )
- vbuf_flush_indices(vbuf);
}
@@ -202,7 +200,7 @@ vbuf_point( struct draw_stage *stage,
* will be flushed if needed and a new one allocated.
*/
static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
+vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
{
struct translate_key hw_key;
unsigned dst_offset;
@@ -217,11 +215,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
* state change.
*/
vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
-
- if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) {
- vbuf_flush_vertices(vbuf);
- vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
- }
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
/* Translate from pipeline vertices to hw vertices.
*/
@@ -294,8 +288,8 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
/* Allocate new buffer?
*/
- if (!vbuf->vertices)
- vbuf_alloc_vertices(vbuf);
+ assert(vbuf->vertices == NULL);
+ vbuf_alloc_vertices(vbuf);
}
@@ -305,9 +299,9 @@ vbuf_first_tri( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri = vbuf_tri;
- vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri( stage, prim );
}
@@ -318,9 +312,9 @@ vbuf_first_line( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_LINES);
stage->line = vbuf_line;
- vbuf_set_prim(vbuf, PIPE_PRIM_LINES);
stage->line( stage, prim );
}
@@ -331,53 +325,42 @@ vbuf_first_point( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices(vbuf);
+ vbuf_start_prim(vbuf, PIPE_PRIM_POINTS);
stage->point = vbuf_point;
- vbuf_set_prim(vbuf, PIPE_PRIM_POINTS);
stage->point( stage, prim );
}
-static void
-vbuf_flush_indices( struct vbuf_stage *vbuf )
-{
- if(!vbuf->nr_indices)
- return;
-
- assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
- vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
-
- vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
-
- vbuf->nr_indices = 0;
-}
-
/**
* Flush existing vertex buffer and allocate a new one.
- *
- * XXX: We separate flush-on-index-full and flush-on-vb-full, but may
- * raise issues uploading vertices if the hardware wants to flush when
- * we flush.
*/
static void
vbuf_flush_vertices( struct vbuf_stage *vbuf )
{
- if(vbuf->vertices) {
- vbuf_flush_indices(vbuf);
-
+ if(vbuf->vertices) {
+
+ vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 );
+
+ if (vbuf->nr_indices)
+ {
+ vbuf->render->draw(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
+
+ vbuf->nr_indices = 0;
+ }
+
/* Reset temporary vertices ids */
if(vbuf->nr_vertices)
draw_reset_vertex_ids( vbuf->stage.draw );
/* Free the vertex buffer */
- vbuf->render->release_vertices(vbuf->render,
- vbuf->vertices,
- vbuf->vertex_size,
- vbuf->nr_vertices);
+ vbuf->render->release_vertices( vbuf->render );
+
vbuf->max_vertices = vbuf->nr_vertices = 0;
vbuf->vertex_ptr = vbuf->vertices = NULL;
-
}
}
@@ -394,14 +377,20 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* even number */
vbuf->max_vertices = vbuf->max_vertices & ~1;
+ if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
+ vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
+
/* Must always succeed -- driver gives us a
* 'max_vertex_buffer_bytes' which it guarantees it can allocate,
* and it will flush itself if necessary to do so. If this does
* fail, we are basically without usable hardware.
*/
- vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
+ vbuf->render->allocate_vertices(vbuf->render,
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
+
+ vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render );
+
vbuf->vertex_ptr = vbuf->vertices;
}
@@ -412,14 +401,11 @@ vbuf_flush( struct draw_stage *stage, unsigned flags )
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
stage->point = vbuf_first_point;
stage->line = vbuf_first_line;
stage->tri = vbuf_first_tri;
-
- if (flags & DRAW_FLUSH_BACKEND)
- vbuf_flush_vertices( vbuf );
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index a16b45d340..81e4eae401 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -187,6 +187,9 @@ struct draw_context
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
+ uint num_samplers;
+ struct tgsi_sampler **samplers;
+
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
@@ -198,7 +201,7 @@ struct draw_context
const float (*aligned_constants)[4];
- float (*aligned_constant_storage)[4];
+ const float (*aligned_constant_storage)[4];
unsigned const_storage_size;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 18f24e5980..4e5ffa0930 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
- ptr += draw->pt.vertex_buffer[buf].pitch * ii;
+ ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
debug_printf(" Attr %u: ", j);
@@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" pitch=%u offset=%u ptr=%p\n",
- draw->pt.vertex_buffer[i].pitch,
+ debug_printf(" stride=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].stride,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index c02f229110..aecaeee5b9 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -173,9 +173,7 @@ void draw_pt_emit( struct pt_emit *emit,
void draw_pt_emit_linear( struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count );
void draw_pt_emit_destroy( struct pt_emit *emit );
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d520b05869..064e16c295 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -165,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (vertex_count == 0)
+ return;
+
+ if (vertex_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -173,9 +181,11 @@ void draw_pt_emit( struct pt_emit *emit,
return;
}
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)vertex_count);
+ render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)vertex_count);
+
+ hw_verts = render->map_vertices( render );
if (!hw_verts) {
assert(0);
return;
@@ -196,22 +206,21 @@ void draw_pt_emit( struct pt_emit *emit,
vertex_count,
hw_verts );
+ render->unmap_vertices( render,
+ 0,
+ vertex_count - 1 );
+
render->draw(render,
elts,
count);
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+ render->release_vertices(render);
}
void draw_pt_emit_linear(struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count)
{
struct draw_context *draw = emit->draw;
@@ -226,21 +235,23 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
- if (!draw->render->set_primitive(draw->render, emit->prim)) {
- assert(0);
- return;
- }
+ if (!draw->render->set_primitive(draw->render, emit->prim))
+ goto fail;
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)count);
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count))
+ goto fail;
+
+ hw_verts = render->map_vertices( render );
+ if (!hw_verts)
+ goto fail;
translate->set_buffer(translate, 0,
vertex_data, stride);
@@ -251,12 +262,12 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
- vertex_count,
+ count,
hw_verts);
if (0) {
unsigned i;
- for (i = 0; i < vertex_count; i++) {
+ for (i = 0; i < count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
draw_dump_emitted_vertex( emit->vinfo,
(const uint8_t *)hw_verts +
@@ -264,13 +275,17 @@ void draw_pt_emit_linear(struct pt_emit *emit,
}
}
+ render->unmap_vertices( render, 0, count - 1 );
+
+ render->draw_arrays(render, 0, count);
+
+ render->release_vertices(render);
- render->draw_arrays(render, start, count);
+ return;
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+fail:
+ assert(0);
+ return;
}
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 6377f896fb..058caf7dcc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run_elts( translate,
@@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 3966ad48ba..6b7d02a19b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -229,9 +229,16 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)fetch_count );
+ if (fetch_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
+ draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)fetch_count );
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts) {
assert(0);
return;
@@ -254,6 +261,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render,
+ 0,
+ (ushort)(fetch_count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -263,10 +274,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
}
@@ -283,13 +291,17 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices and emit HW verts.
*/
@@ -307,20 +319,21 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, count - 1 );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw_arrays( draw->render,
- 0, /*start*/
- count );
+ draw->render->draw_arrays( draw->render, 0, count );
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -338,9 +351,15 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts)
return FALSE;
@@ -351,6 +370,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
count,
hw_verts );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -360,10 +381,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index f7e6a1a8ee..cd9cd4b53f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
for (i = 0; i < 5 && i < nr_vbs; i++) {
- if (draw->pt.vertex_buffer[i].pitch == 0)
+ if (draw->pt.vertex_buffer[i].stride == 0)
fse->key.const_vbuffers |= (1<<i);
}
@@ -189,7 +189,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
i,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -234,14 +234,17 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -251,13 +254,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
start, count,
hw_verts );
- /* Draw arrays path to avoid re-emitting index list again and
- * again.
- */
- draw->render->draw_arrays( draw->render,
- 0,
- count );
-
+
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
@@ -269,12 +266,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
(const uint8_t *)hw_verts + fse->key.output_stride * i );
}
}
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+
+ return;
+
+fail:
+ assert(0);
+ return;
}
@@ -293,13 +302,17 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)fetch_count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (fetch_count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
@@ -309,9 +322,6 @@ fse_run(struct draw_pt_middle_end *middle,
fetch_count,
hw_verts );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
if (0) {
unsigned i;
@@ -323,12 +333,19 @@ fse_run(struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -347,13 +364,17 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
- if (!hw_verts) {
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
return FALSE;
- }
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -369,11 +390,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
draw_count );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index ec3b41c320..38f9b604d3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -251,9 +251,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
else {
draw_pt_emit_linear( fpme->emit,
(const float (*)[4])pipeline_verts->data,
- count,
fpme->vertex_size,
- 0, /*start*/
count );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index c15afe65f1..d0e16c9bc3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
unsigned segment_count,
boolean end )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 1) {
unsigned nr = 0, i;
@@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
if (end)
varray->fetch_elts[nr++] = start;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
@@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
unsigned segment_start,
unsigned segment_count )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 2) {
unsigned nr = 0, i;
@@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
for (i = 0 ; i < segment_count; i++)
varray->fetch_elts[nr++] = start + segment_start + i;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 80d7200ca6..5d268a2226 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
unsigned fetch_count = max_index + 1 - min_index;
const ushort *transformed_elts;
ushort *storage = NULL;
- boolean ok;
+ boolean ok = FALSE;
if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
@@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend,
transformed_elts = storage;
}
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
+ if (fetch_count < UNDEFINED_VERTEX_ID)
+ ok = vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
FREE(storage);
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index 9ac068c47b..cccd3bf435 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -30,14 +30,17 @@
* Vertex buffer drawing stage.
*
* \author Keith Whitwell <keith@tungstengraphics.com>
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
*/
#ifndef DRAW_VBUF_H_
#define DRAW_VBUF_H_
+#include "pipe/p_compiler.h"
+
+struct pipe_rasterizer_state;
struct draw_context;
struct vertex_info;
@@ -77,9 +80,14 @@ struct vbuf_render {
* Hardware renderers will use ttm memory, others will just malloc
* something.
*/
- void *(*allocate_vertices)( struct vbuf_render *,
- ushort vertex_size,
- ushort nr_vertices );
+ boolean (*allocate_vertices)( struct vbuf_render *,
+ ushort vertex_size,
+ ushort nr_vertices );
+
+ void *(*map_vertices)( struct vbuf_render * );
+ void (*unmap_vertices)( struct vbuf_render *,
+ ushort min_index,
+ ushort max_index );
/**
* Notify the renderer of the current primitive when it changes.
@@ -106,10 +114,7 @@ struct vbuf_render {
/**
* Called when vbuf is done with this set of vertices:
*/
- void (*release_vertices)( struct vbuf_render *,
- void *vertices,
- unsigned vertex_size,
- unsigned vertices_used );
+ void (*release_vertices)( struct vbuf_render * );
void (*destroy)( struct vbuf_render * );
};
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index a943607d7e..c143cf2372 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -81,9 +81,9 @@ struct vertex_info
* memcmp() comparisons.
*/
struct {
- ubyte interp_mode:4; /**< INTERP_x */
- ubyte emit:4; /**< EMIT_x */
- ubyte src_index; /**< map to post-xform attribs */
+ unsigned interp_mode:4; /**< INTERP_x */
+ unsigned emit:4; /**< EMIT_x */
+ unsigned src_index:8; /**< map to post-xform attribs */
} attrib[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 34adbd49b0..c057cd67fd 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
{
- if (((unsigned)constants) & 0xf) {
+ if (((uintptr_t)constants) & 0xf) {
if (size > draw->vs.const_storage_size) {
if (draw->vs.aligned_constant_storage)
align_free((void *)draw->vs.aligned_constant_storage);
@@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw,
if (!vs) {
vs = draw_create_vs_sse( draw, shader );
if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
+ vs = draw_create_vs_ppc( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
+ }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 68c24abad3..89ae158751 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 875ecb92db..1fb69ef81a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -32,7 +32,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
@@ -884,7 +884,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp )
}
}
-
+#if 0
static void x87_emit_ex2( struct aos_compilation *cp )
{
struct x86_reg st0 = x86_make_reg(file_x87, 0);
@@ -907,13 +907,17 @@ static void x87_emit_ex2( struct aos_compilation *cp )
assert( stack == cp->func->x87_stack);
}
+#endif
+#if 0
static void PIPE_CDECL print_reg( const char *msg,
const float *reg )
{
debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
}
+#endif
+#if 0
static void emit_print( struct aos_compilation *cp,
const char *message, /* must point to a static string! */
unsigned file,
@@ -965,6 +969,7 @@ static void emit_print( struct aos_compilation *cp,
/* Done...
*/
}
+#endif
/**
* The traditional instructions. All operate on internal registers
@@ -1103,7 +1108,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+#if 0
static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
@@ -1111,6 +1116,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
return TRUE;
}
+#endif
static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
@@ -1566,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
*/
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
-
if (0) {
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg r = aos_get_xmm_reg(cp);
@@ -1575,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
else {
- struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg r = aos_get_xmm_reg(cp);
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
struct x86_reg src = get_xmm_writable( cp, arg0 );
-
- sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
- sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
- sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
- sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, src);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, src);
+
+ sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */
+ sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */
store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+
return TRUE;
}
}
@@ -1877,7 +1891,7 @@ static boolean note_immediate( struct aos_compilation *cp,
unsigned pos = cp->num_immediates++;
unsigned j;
- for (j = 0; j < imm->Immediate.Size; j++) {
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 82d27d4493..b3200df811 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -62,12 +62,15 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
{
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(evs->machine,
- shader->state.tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
-
+ /* Specify the vertex program to interpret/execute.
+ * Avoid rebinding when possible.
+ */
+ if (evs->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(evs->machine,
+ shader->state.tokens,
+ draw->vs.num_samplers,
+ draw->vs.samplers);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
new file mode 100644
index 0000000000..d35db57d57
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_config.h"
+
+#include "draw_vs.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_ppc.h"
+#include "tgsi/tgsi_ppc.h"
+#include "tgsi/tgsi_parse.h"
+
+
+
+typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
+ float (*outputs)[4][4],
+ float (*temps)[4][4],
+ float (*immeds)[4],
+ float (*consts)[4],
+ const float *builtins);
+
+
+struct draw_ppc_vertex_shader {
+ struct draw_vertex_shader base;
+ struct ppc_function ppc_program;
+
+ codegen_function func;
+};
+
+
+static void
+vs_ppc_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ /* nothing */
+}
+
+
+/**
+ * Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_ppc_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+ unsigned int i;
+
+#define MAX_VERTICES 4
+
+ /* loop over verts */
+ for (i = 0; i < count; i += MAX_VERTICES) {
+ const uint max_vertices = MIN2(MAX_VERTICES, count - i);
+ float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB;
+ float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB;
+ float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB;
+ uint attr;
+
+ /* convert (up to) four input verts to SoA format */
+ for (attr = 0; attr < base->info.num_inputs; attr++) {
+ const float *vIn = (const float *) input;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+#if 0
+ if (attr==0)
+ printf("Input v%d a%d: %f %f %f %f\n",
+ vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]);
+#endif
+ inputs_soa[attr][0][vert] = vIn[attr * 4 + 0];
+ inputs_soa[attr][1][vert] = vIn[attr * 4 + 1];
+ inputs_soa[attr][2][vert] = vIn[attr * 4 + 2];
+ inputs_soa[attr][3][vert] = vIn[attr * 4 + 3];
+ vIn += input_stride / 4;
+ }
+ }
+
+ /* run compiled shader
+ */
+ shader->func(inputs_soa, outputs_soa, temps_soa,
+ (float (*)[4]) shader->base.immediates,
+ (float (*)[4]) constants,
+ ppc_builtin_constants);
+
+ /* convert (up to) four output verts from SoA back to AoS format */
+ for (attr = 0; attr < base->info.num_outputs; attr++) {
+ float *vOut = (float *) output;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+ vOut[attr * 4 + 0] = outputs_soa[attr][0][vert];
+ vOut[attr * 4 + 1] = outputs_soa[attr][1][vert];
+ vOut[attr * 4 + 2] = outputs_soa[attr][2][vert];
+ vOut[attr * 4 + 3] = outputs_soa[attr][3][vert];
+#if 0
+ if (attr==0)
+ printf("Output v%d a%d: %f %f %f %f\n",
+ vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]);
+#endif
+ vOut += output_stride / 4;
+ }
+ }
+
+ /* advance to next group of four input/output verts */
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+ }
+}
+
+
+static void
+vs_ppc_delete( struct draw_vertex_shader *base )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+
+ ppc_release_func( &shader->ppc_program );
+
+ align_free( (void *) shader->base.immediates );
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_ppc_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_ppc_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+#if 0
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_ppc;
+ else
+#endif
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.prepare = vs_ppc_prepare;
+ vs->base.run_linear = vs_ppc_run_linear;
+ vs->base.delete = vs_ppc_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ ppc_init_func( &vs->ppc_program );
+
+#if 0
+ ppc_print_code(&vs->ppc_program, TRUE);
+ ppc_indent(&vs->ppc_program, 8);
+#endif
+
+ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
+ &vs->ppc_program,
+ (float (*)[4]) vs->base.immediates,
+ TRUE ))
+ goto fail;
+
+ vs->func = (codegen_function) ppc_get_func( &vs->ppc_program );
+ if (!vs->func) {
+ goto fail;
+ }
+
+ return &vs->base;
+
+fail:
+ /*
+ debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n");
+ */
+
+ ppc_release_func( &vs->ppc_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else /* PIPE_ARCH_PPC */
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return (void *) 0;
+}
+
+
+#endif /* PIPE_ARCH_PPC */