diff options
Diffstat (limited to 'src/gallium/auxiliary')
120 files changed, 13642 insertions, 1355 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 6b1754ea00..0bc77a5728 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,7 +28,7 @@ /* Authors: Zack Rusin <zack@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 68508f24de..a9157aad71 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -783,7 +783,7 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - dst->num_cbufs = src->num_cbufs; + dst->nr_cbufs = src->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); } diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 7f0044c5a7..288cef7b6f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -30,7 +30,7 @@ * Zack Rusin <zack@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "cso_hash.h" @@ -431,3 +431,9 @@ struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter --hash->data.d->size; return ret; } + +boolean cso_hash_contains(struct cso_hash *hash, unsigned key) +{ + struct cso_node **node = cso_hash_find_node(hash, key); + return (*node != hash->data.e); +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 85f3e276c6..5891c325fa 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -44,6 +44,7 @@ #ifndef CSO_HASH_H #define CSO_HASH_H +#include "pipe/p_compiler.h" #ifdef __cplusplus extern "C" { @@ -95,6 +96,11 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); */ struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); +/** + * Returns true if a value with the given key exists in the hash + */ +boolean cso_hash_contains(struct cso_hash *hash, unsigned key); + int cso_hash_iter_is_null(struct cso_hash_iter iter); unsigned cso_hash_iter_key(struct cso_hash_iter iter); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index f2e36a89e9..bdbf5a08ed 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -40,6 +40,7 @@ C_SOURCES = \ draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ + draw_vs_ppc.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 544a04918b..5f05aa324a 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary( 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', + 'draw_vs_ppc.c', 'draw_vs_sse.c', 'draw_vs_varient.c' ]) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index fab8fc95fc..7bd4a2e221 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -343,6 +343,21 @@ draw_num_vs_outputs(const struct draw_context *draw) } +/** + * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * This might only be used by software drivers for the time being. + */ +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers) +{ + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; +} + + + void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index a29bb01d81..d529e4e9a2 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,7 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; - +struct tgsi_sampler; struct draw_context *draw_create( void ); @@ -92,6 +92,12 @@ uint draw_num_vs_outputs(const struct draw_context *draw); +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers); + + /* * Vertex shader functions diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index b764d9c518..a0f9716dac 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, struct tgsi_full_immediate immed; uint size = 4; immed = tgsi_default_full_immediate(); - immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.Immediate.NrTokens = 1 + size; /* one for the token itself */ immed.u.Pointer = (void *) value; ctx->emit_immediate(ctx, &immed); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 9825e116c3..12325d30d6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -29,12 +29,12 @@ * \file * Vertex buffer drawing stage. * - * \author José Fonseca <jrfonsec@tungstengraphics.com> + * \author Jose Fonseca <jrfonsec@tungstengraphics.com> * \author Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -93,7 +93,6 @@ vbuf_stage( struct draw_stage *stage ) } -static void vbuf_flush_indices( struct vbuf_stage *vbuf ); static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); @@ -109,13 +108,12 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) static INLINE void check_space( struct vbuf_stage *vbuf, unsigned nr ) { - if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { - vbuf_flush_vertices(vbuf); - vbuf_alloc_vertices(vbuf); + if (vbuf->nr_vertices + nr > vbuf->max_vertices || + vbuf->nr_indices + nr > vbuf->max_indices) + { + vbuf_flush_vertices( vbuf ); + vbuf_alloc_vertices( vbuf ); } - - if (vbuf->nr_indices + nr > vbuf->max_indices ) - vbuf_flush_indices(vbuf); } @@ -202,7 +200,7 @@ vbuf_point( struct draw_stage *stage, * will be flushed if needed and a new one allocated. */ static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) +vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) { struct translate_key hw_key; unsigned dst_offset; @@ -217,11 +215,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) * state change. */ vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - - if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { - vbuf_flush_vertices(vbuf); - vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); - } + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); /* Translate from pipeline vertices to hw vertices. */ @@ -294,8 +288,8 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Allocate new buffer? */ - if (!vbuf->vertices) - vbuf_alloc_vertices(vbuf); + assert(vbuf->vertices == NULL); + vbuf_alloc_vertices(vbuf); } @@ -305,9 +299,9 @@ vbuf_first_tri( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); + vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES); stage->tri = vbuf_tri; - vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); stage->tri( stage, prim ); } @@ -318,9 +312,9 @@ vbuf_first_line( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); + vbuf_start_prim(vbuf, PIPE_PRIM_LINES); stage->line = vbuf_line; - vbuf_set_prim(vbuf, PIPE_PRIM_LINES); stage->line( stage, prim ); } @@ -331,53 +325,42 @@ vbuf_first_point( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices(vbuf); + vbuf_start_prim(vbuf, PIPE_PRIM_POINTS); stage->point = vbuf_point; - vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); stage->point( stage, prim ); } -static void -vbuf_flush_indices( struct vbuf_stage *vbuf ) -{ - if(!vbuf->nr_indices) - return; - - assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == - vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); - - vbuf->nr_indices = 0; -} - /** * Flush existing vertex buffer and allocate a new one. - * - * XXX: We separate flush-on-index-full and flush-on-vb-full, but may - * raise issues uploading vertices if the hardware wants to flush when - * we flush. */ static void vbuf_flush_vertices( struct vbuf_stage *vbuf ) { - if(vbuf->vertices) { - vbuf_flush_indices(vbuf); - + if(vbuf->vertices) { + + vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 ); + + if (vbuf->nr_indices) + { + vbuf->render->draw(vbuf->render, + vbuf->indices, + vbuf->nr_indices ); + + vbuf->nr_indices = 0; + } + /* Reset temporary vertices ids */ if(vbuf->nr_vertices) draw_reset_vertex_ids( vbuf->stage.draw ); /* Free the vertex buffer */ - vbuf->render->release_vertices(vbuf->render, - vbuf->vertices, - vbuf->vertex_size, - vbuf->nr_vertices); + vbuf->render->release_vertices( vbuf->render ); + vbuf->max_vertices = vbuf->nr_vertices = 0; vbuf->vertex_ptr = vbuf->vertices = NULL; - } } @@ -394,14 +377,20 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* even number */ vbuf->max_vertices = vbuf->max_vertices & ~1; + if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID) + vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1; + /* Must always succeed -- driver gives us a * 'max_vertex_buffer_bytes' which it guarantees it can allocate, * and it will flush itself if necessary to do so. If this does * fail, we are basically without usable hardware. */ - vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); + vbuf->render->allocate_vertices(vbuf->render, + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); + + vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render ); + vbuf->vertex_ptr = vbuf->vertices; } @@ -412,14 +401,11 @@ vbuf_flush( struct draw_stage *stage, unsigned flags ) { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); stage->point = vbuf_first_point; stage->line = vbuf_first_line; stage->tri = vbuf_first_tri; - - if (flags & DRAW_FLUSH_BACKEND) - vbuf_flush_vertices( vbuf ); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index a16b45d340..81e4eae401 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -187,6 +187,9 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; + uint num_samplers; + struct tgsi_sampler **samplers; + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; @@ -198,7 +201,7 @@ struct draw_context const float (*aligned_constants)[4]; - float (*aligned_constant_storage)[4]; + const float (*aligned_constant_storage)[4]; unsigned const_storage_size; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 18f24e5980..4e5ffa0930 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; - ptr += draw->pt.vertex_buffer[buf].pitch * ii; + ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; debug_printf(" Attr %u: ", j); @@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" pitch=%u offset=%u ptr=%p\n", - draw->pt.vertex_buffer[i].pitch, + debug_printf(" stride=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].stride, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index c02f229110..aecaeee5b9 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -173,9 +173,7 @@ void draw_pt_emit( struct pt_emit *emit, void draw_pt_emit_linear( struct pt_emit *emit, const float (*vertex_data)[4], - unsigned vertex_count, unsigned stride, - unsigned start, unsigned count ); void draw_pt_emit_destroy( struct pt_emit *emit ); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d520b05869..064e16c295 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -165,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (vertex_count == 0) + return; + + if (vertex_count >= UNDEFINED_VERTEX_ID) { + assert(0); + return; + } + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ @@ -173,9 +181,11 @@ void draw_pt_emit( struct pt_emit *emit, return; } - hw_verts = render->allocate_vertices(render, - (ushort)translate->key.output_stride, - (ushort)vertex_count); + render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)vertex_count); + + hw_verts = render->map_vertices( render ); if (!hw_verts) { assert(0); return; @@ -196,22 +206,21 @@ void draw_pt_emit( struct pt_emit *emit, vertex_count, hw_verts ); + render->unmap_vertices( render, + 0, + vertex_count - 1 ); + render->draw(render, elts, count); - render->release_vertices(render, - hw_verts, - translate->key.output_stride, - vertex_count); + render->release_vertices(render); } void draw_pt_emit_linear(struct pt_emit *emit, const float (*vertex_data)[4], - unsigned vertex_count, unsigned stride, - unsigned start, unsigned count) { struct draw_context *draw = emit->draw; @@ -226,21 +235,23 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count >= UNDEFINED_VERTEX_ID) + goto fail; + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ - if (!draw->render->set_primitive(draw->render, emit->prim)) { - assert(0); - return; - } + if (!draw->render->set_primitive(draw->render, emit->prim)) + goto fail; - hw_verts = render->allocate_vertices(render, - (ushort)translate->key.output_stride, - (ushort)count); - if (!hw_verts) { - assert(0); - return; - } + if (!render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count)) + goto fail; + + hw_verts = render->map_vertices( render ); + if (!hw_verts) + goto fail; translate->set_buffer(translate, 0, vertex_data, stride); @@ -251,12 +262,12 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, - vertex_count, + count, hw_verts); if (0) { unsigned i; - for (i = 0; i < vertex_count; i++) { + for (i = 0; i < count; i++) { debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); draw_dump_emitted_vertex( emit->vinfo, (const uint8_t *)hw_verts + @@ -264,13 +275,17 @@ void draw_pt_emit_linear(struct pt_emit *emit, } } + render->unmap_vertices( render, 0, count - 1 ); + + render->draw_arrays(render, 0, count); + + render->release_vertices(render); - render->draw_arrays(render, start, count); + return; - render->release_vertices(render, - hw_verts, - translate->key.output_stride, - vertex_count); +fail: + assert(0); + return; } struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 6377f896fb..058caf7dcc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run_elts( translate, @@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 3966ad48ba..6b7d02a19b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / @@ -229,9 +229,16 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)fetch_count ); + if (fetch_count >= UNDEFINED_VERTEX_ID) { + assert(0); + return; + } + + draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)fetch_count ); + + hw_verts = draw->render->map_vertices( draw->render ); if (!hw_verts) { assert(0); return; @@ -254,6 +261,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, + 0, + (ushort)(fetch_count - 1) ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ @@ -263,10 +274,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - fetch_count ); + draw->render->release_vertices( draw->render ); } @@ -283,13 +291,17 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)count ); - if (!hw_verts) { - assert(0); - return; - } + if (count >= UNDEFINED_VERTEX_ID) + goto fail; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices and emit HW verts. */ @@ -307,20 +319,21 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, 0, count - 1 ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ - draw->render->draw_arrays( draw->render, - 0, /*start*/ - count ); + draw->render->draw_arrays( draw->render, 0, count ); /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); + return; +fail: + assert(0); + return; } @@ -338,9 +351,15 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + return FALSE; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count )) + return FALSE; + + hw_verts = draw->render->map_vertices( draw->render ); if (!hw_verts) return FALSE; @@ -351,6 +370,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, count, hw_verts ); + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ @@ -360,10 +381,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index f7e6a1a8ee..cd9cd4b53f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } for (i = 0; i < 5 && i < nr_vbs; i++) { - if (draw->pt.vertex_buffer[i].pitch == 0) + if (draw->pt.vertex_buffer[i].stride == 0) fse->key.const_vbuffers |= (1<<i); } @@ -189,7 +189,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, i, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / @@ -234,14 +234,17 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + goto fail; - if (!hw_verts) { - assert(0); - return; - } + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices, run shader and emit HW verts. * Clipping is done elsewhere -- either by the API or on hardware, @@ -251,13 +254,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, start, count, hw_verts ); - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - draw->render->draw_arrays( draw->render, - 0, - count ); - + if (0) { unsigned i; for (i = 0; i < count; i++) { @@ -269,12 +266,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, (const uint8_t *)hw_verts + fse->key.output_stride * i ); } } + + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); + + return; + +fail: + assert(0); + return; } @@ -293,13 +302,17 @@ fse_run(struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)fetch_count ); - if (!hw_verts) { - assert(0); - return; - } + if (fetch_count >= UNDEFINED_VERTEX_ID) + goto fail; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)fetch_count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices, run shader and emit HW verts. @@ -309,9 +322,6 @@ fse_run(struct draw_pt_middle_end *middle, fetch_count, hw_verts ); - draw->render->draw( draw->render, - draw_elts, - draw_count ); if (0) { unsigned i; @@ -323,12 +333,19 @@ fse_run(struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - fetch_count ); + draw->render->release_vertices( draw->render ); + return; +fail: + assert(0); + return; } @@ -347,13 +364,17 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + return FALSE; - if (!hw_verts) { + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count )) + return FALSE; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) return FALSE; - } /* Single routine to fetch vertices, run shader and emit HW verts. * Clipping is done elsewhere -- either by the API or on hardware, @@ -369,11 +390,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, draw_count ); + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index ec3b41c320..38f9b604d3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -251,9 +251,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, else { draw_pt_emit_linear( fpme->emit, (const float (*)[4])pipeline_verts->data, - count, fpme->vertex_size, - 0, /*start*/ count ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index c15afe65f1..d0e16c9bc3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, unsigned segment_count, boolean end ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 1) { unsigned nr = 0, i; @@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, if (end) varray->fetch_elts[nr++] = start; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, @@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray, unsigned segment_start, unsigned segment_count ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 2) { unsigned nr = 0, i; @@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray, for (i = 0 ; i < segment_count; i++) varray->fetch_elts[nr++] = start + segment_start + i; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 80d7200ca6..5d268a2226 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, unsigned fetch_count = max_index + 1 - min_index; const ushort *transformed_elts; ushort *storage = NULL; - boolean ok; + boolean ok = FALSE; if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, @@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend, transformed_elts = storage; } - ok = vcache->middle->run_linear_elts( vcache->middle, - min_index, /* start */ - fetch_count, - transformed_elts, - draw_count ); + if (fetch_count < UNDEFINED_VERTEX_ID) + ok = vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); FREE(storage); diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 9ac068c47b..cccd3bf435 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -30,14 +30,17 @@ * Vertex buffer drawing stage. * * \author Keith Whitwell <keith@tungstengraphics.com> - * \author José Fonseca <jrfonsec@tungstengraphics.com> + * \author Jose Fonseca <jrfonsec@tungstengraphics.com> */ #ifndef DRAW_VBUF_H_ #define DRAW_VBUF_H_ +#include "pipe/p_compiler.h" + +struct pipe_rasterizer_state; struct draw_context; struct vertex_info; @@ -77,9 +80,14 @@ struct vbuf_render { * Hardware renderers will use ttm memory, others will just malloc * something. */ - void *(*allocate_vertices)( struct vbuf_render *, - ushort vertex_size, - ushort nr_vertices ); + boolean (*allocate_vertices)( struct vbuf_render *, + ushort vertex_size, + ushort nr_vertices ); + + void *(*map_vertices)( struct vbuf_render * ); + void (*unmap_vertices)( struct vbuf_render *, + ushort min_index, + ushort max_index ); /** * Notify the renderer of the current primitive when it changes. @@ -106,10 +114,7 @@ struct vbuf_render { /** * Called when vbuf is done with this set of vertices: */ - void (*release_vertices)( struct vbuf_render *, - void *vertices, - unsigned vertex_size, - unsigned vertices_used ); + void (*release_vertices)( struct vbuf_render * ); void (*destroy)( struct vbuf_render * ); }; diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index a943607d7e..c143cf2372 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -81,9 +81,9 @@ struct vertex_info * memcmp() comparisons. */ struct { - ubyte interp_mode:4; /**< INTERP_x */ - ubyte emit:4; /**< EMIT_x */ - ubyte src_index; /**< map to post-xform attribs */ + unsigned interp_mode:4; /**< INTERP_x */ + unsigned emit:4; /**< EMIT_x */ + unsigned src_index:8; /**< map to post-xform attribs */ } attrib[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 34adbd49b0..c057cd67fd 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) { - if (((unsigned)constants) & 0xf) { + if (((uintptr_t)constants) & 0xf) { if (size > draw->vs.const_storage_size) { if (draw->vs.aligned_constant_storage) align_free((void *)draw->vs.aligned_constant_storage); @@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw, if (!vs) { vs = draw_create_vs_sse( draw, shader ); if (!vs) { - vs = draw_create_vs_exec( draw, shader ); + vs = draw_create_vs_ppc( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } } } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 68c24abad3..89ae158751 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ); struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 875ecb92db..1fb69ef81a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -32,7 +32,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -884,7 +884,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp ) } } - +#if 0 static void x87_emit_ex2( struct aos_compilation *cp ) { struct x86_reg st0 = x86_make_reg(file_x87, 0); @@ -907,13 +907,17 @@ static void x87_emit_ex2( struct aos_compilation *cp ) assert( stack == cp->func->x87_stack); } +#endif +#if 0 static void PIPE_CDECL print_reg( const char *msg, const float *reg ) { debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); } +#endif +#if 0 static void emit_print( struct aos_compilation *cp, const char *message, /* must point to a static string! */ unsigned file, @@ -965,6 +969,7 @@ static void emit_print( struct aos_compilation *cp, /* Done... */ } +#endif /** * The traditional instructions. All operate on internal registers @@ -1103,7 +1108,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld_src(cp, &op->FullSrcRegisters[0], 0); @@ -1111,6 +1116,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst x87_fstp_dest4(cp, &op->FullDstRegisters[0]); return TRUE; } +#endif static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) @@ -1566,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - if (0) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg r = aos_get_xmm_reg(cp); @@ -1575,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg r = aos_get_xmm_reg(cp); + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); struct x86_reg src = get_xmm_writable( cp, arg0 ); - - sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ - sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ - sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ - sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, src); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, src); + + sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */ + sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ store_scalar_dest(cp, &op->FullDstRegisters[0], r); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; } } @@ -1877,7 +1891,7 @@ static boolean note_immediate( struct aos_compilation *cp, unsigned pos = cp->num_immediates++; unsigned j; - for (j = 0; j < imm->Immediate.Size; j++) { + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float; } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 82d27d4493..b3200df811 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -62,12 +62,15 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { struct exec_vertex_shader *evs = exec_vertex_shader(shader); - /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(evs->machine, - shader->state.tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); - + /* Specify the vertex program to interpret/execute. + * Avoid rebinding when possible. + */ + if (evs->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(evs->machine, + shader->state.tokens, + draw->vs.num_samplers, + draw->vs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..d35db57d57 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4], + float (*consts)[4], + const float *builtins); + + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ + /* nothing */ +} + + +/** + * Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4]) shader->base.immediates, + (float (*)[4]) constants, + ppc_builtin_constants); + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * + sizeof(float), 16); + + ppc_init_func( &vs->ppc_program ); + +#if 0 + ppc_print_code(&vs->ppc_program, TRUE); + ppc_indent(&vs->ppc_program, 8); +#endif + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4]) vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + /* + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index c3f7bfba93..5a96d94ec3 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -66,12 +66,12 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp1.bin gallivmsoabuiltins.cpp: soabuiltins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp2.bin # Emacs tags diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 29adeea47d..f4af5cc8ad 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -53,7 +53,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index fcc5c05794..634bac0150 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -137,4 +137,4 @@ static const unsigned char llvm_builtins_data[] = { 0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, 0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, 0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 3a4a41e544..1bd00a0c2a 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -56,7 +56,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> @@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); llvm::ExecutionEngine *ee = cpu->engine; assert(ee); - /*FIXME : remove */ - ee->DisableLazyCompilation(); + /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ + ee->DisableLazyCompilation(false); ee->addModuleProvider(mp); llvm::Function *func = func_for_shader(prog); @@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine() typedef void (*vertex_shader_runner)(void *ainputs, void *dests, - float (*aconsts)[4], - void *temps); + float (*aconsts)[4]); #define MAX_TGSI_VERTICES 4 /*! @@ -202,7 +201,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, unsigned int i, j; unsigned slot; vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); - assert(runner); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { @@ -224,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, /* run shader */ runner(machine->Inputs, machine->Outputs, - (float (*)[4]) constants, - machine->Temps); + (float (*)[4]) constants); /* Unswizzle all output results */ diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index ebf3e11cd5..d2c5852bdf 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -101,10 +101,10 @@ static INLINE int gallivm_w_swizzle(int swizzle) return w; } -#endif /* MESA_LLVM */ - #if defined __cplusplus } #endif +#endif /* MESA_LLVM */ + #endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 599975d5ad..ee8162efce 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -43,7 +43,7 @@ #include <llvm/Function.h> #include <llvm/InstrTypes.h> #include <llvm/Instructions.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/MemoryBuffer.h> #include <llvm/Bitcode/ReaderWriter.h> diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a658072551..925e948763 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -37,7 +37,7 @@ #include <llvm/Function.h> #include <llvm/Instructions.h> #include <llvm/Transforms/Utils/Cloning.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/MemoryBuffer.h> #include <llvm/Bitcode/ReaderWriter.h> @@ -90,68 +90,11 @@ llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, return res; } -std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) -{ - std::vector<llvm::Value*> res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - - -std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - void InstructionsSoa::end() { m_builder.CreateRetVoid(); } -std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3) -{ - std::vector<llvm::Value*> res = mul(in1, in2); - return add(res, in3); -} - std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) { std::vector<llvm::Value*> res(4); @@ -171,6 +114,11 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) return res; } +llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() +{ + return &m_builder; +} + void InstructionsSoa::createFunctionMap() { m_functionsMap[TGSI_OPCODE_ABS] = "abs"; @@ -258,11 +206,12 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { + std::string ErrMsg; MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); - m_builtins = ParseBitcodeFile(buffer); - std::cout<<"Builtins created at "<<m_builtins<<std::endl; + (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); + m_builtins = ParseBitcodeFile(buffer, &ErrMsg); + std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl; assert(m_builtins); createDependencies(); } @@ -274,6 +223,41 @@ std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> i return callBuiltin(func, in1); } +std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) +{ + std::vector<llvm::Value*> res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2) { @@ -281,6 +265,59 @@ std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> i return callBuiltin(func, in1, in2); } +std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<llvm::Value*> res = mul(in1, in2); + return add(res, in3); +} + +std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2) @@ -289,6 +326,37 @@ std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> i return callBuiltin(func, in1, in2); } +std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<<I; + for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { + const Value *Op = I.getOperand(op); + std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; + //I->setOperand(op, V); + } + } + } +} + llvm::Value * InstructionsSoa::allocaTemp() { VectorType *vector = VectorType::get(Type::FloatTy, 4); @@ -408,46 +476,6 @@ std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std return allocaToResult(allocaPtr); } -std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_POWER); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MIN); - return callBuiltin(func, in1, in2); -} - - -std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MAX); - return callBuiltin(func, in1, in2); -} - -void checkFunction(Function *func) -{ - for (Function::const_iterator BI = func->begin(), BE = func->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); - II != IE; ++II) { - const Instruction &I = *II; - std::cout<< "Instr = "<<I; - for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { - const Value *Op = I.getOperand(op); - std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; - //I->setOperand(op, V); - } - } - } -} - void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) { assert(originalFunc); @@ -492,28 +520,4 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) } } -std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); - res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); - res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); - res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_LIT); - return callBuiltin(func, in); -} - -std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_RSQ); - return callBuiltin(func, in); -} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 3817fdc904..d6831e0a6b 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -76,6 +76,7 @@ public: void end(); std::vector<llvm::Value*> extractVector(llvm::Value *vector); + llvm::IRBuilder<>* getIRBuilder(); private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index 6f373f6dd5..73df24c976 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -323,7 +323,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, if (indIdx) { getElem = GetElementPtrInst::Create(ptr, - BinaryOperator::create(Instruction::Add, + BinaryOperator::Create(Instruction::Add, indIdx, constantInt(idx), name("add"), diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 78d754371f..4984ce985c 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -30,7 +30,7 @@ #include "gallivm_p.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include <llvm/BasicBlock.h> #include <llvm/Module.h> @@ -48,13 +48,11 @@ using namespace llvm; StorageSoa::StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps) + llvm::Value *consts) : m_block(block), m_input(input), m_output(output), m_consts(consts), - m_temps(temps), m_immediates(0), m_idx(0) { @@ -93,7 +91,7 @@ void StorageSoa::declareImmediates() std::vector<float> vals(4); std::vector<Constant*> channelArray; - vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; + vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; llvm::Constant *xChannel = createConstGlobalVector(vals); vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; @@ -144,22 +142,43 @@ std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx) return res; } -std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx) +llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) { - std::vector<llvm::Value*> res(4); - llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + std::vector<llvm::Value*> x(4); + x[0] = m_builder->CreateExtractElement(vector, + constantInt(cc), + name("x")); + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder->CreateInsertElement(constVector, x[0], + constantInt(0), + name("vecx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(1), + name("vecxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(2), + name("vecxxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(3), + name("vecxxxx")); + return res; +} + +std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) +{ + llvm::Value* res; + std::vector<llvm::Value*> res2(4); + llvm::Value *xChannel; xChannel = elementPointer(m_consts, idx, 0); - yChannel = elementPointer(m_consts, idx, 1); - zChannel = elementPointer(m_consts, idx, 2); - wChannel = elementPointer(m_consts, idx, 3); - res[0] = alignedArrayLoad(xChannel); - res[1] = alignedArrayLoad(yChannel); - res[2] = alignedArrayLoad(zChannel); - res[3] = alignedArrayLoad(wChannel); + res = alignedArrayLoad(xChannel); - return res; + res2[0]=unpackConstElement(m_builder, res,0); + res2[1]=unpackConstElement(m_builder, res,1); + res2[2]=unpackConstElement(m_builder, res,2); + res2[3]=unpackConstElement(m_builder, res,3); + + return res2; } std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) @@ -174,14 +193,15 @@ std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) return res; } -std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx) +std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) { std::vector<llvm::Value*> res(4); + llvm::Value *temp = m_temps[idx]; - res[0] = element(m_temps, idx, 0); - res[1] = element(m_temps, idx, 1); - res[2] = element(m_temps, idx, 2); - res[3] = element(m_temps, idx, 3); + res[0] = element(temp, constantInt(0), 0); + res[1] = element(temp, constantInt(0), 1); + res[2] = element(temp, constantInt(0), 2); + res[3] = element(temp, constantInt(0), 3); return res; } @@ -260,6 +280,12 @@ llvm::Module * StorageSoa::currentModule() const return m_block->getParent()->getParent(); } +llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) +{ + Constant*c = ConstantFP::get(APFloat(val)); + return c; +} + llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -278,7 +304,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v } std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx) + llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) { std::vector<llvm::Value*> val(4); @@ -299,10 +325,10 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in val = outputElement(realIndex); break; case TGSI_FILE_TEMPORARY: - val = tempElement(realIndex); + val = tempElement(m_builder, idx); break; case TGSI_FILE_CONSTANT: - val = constElement(realIndex); + val = constElement(m_builder, realIndex); break; case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); @@ -328,19 +354,39 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in return res; } +llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, "temp", + m_builder->GetInsertBlock()); + + return alloca; +} + + void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask) + int mask, llvm::IRBuilder<>* m_builder) { llvm::Value *out = 0; + llvm::Value *realIndex = 0; switch(type) { case TGSI_FILE_OUTPUT: out = m_output; + realIndex = constantInt(idx); break; case TGSI_FILE_TEMPORARY: - out = m_temps; + // if that temp doesn't already exist, alloca it + if (m_temps.find(idx) == m_temps.end()) + m_temps[idx] = allocaTemp(m_builder); + + out = m_temps[idx]; + + realIndex = constantInt(0); break; case TGSI_FILE_INPUT: out = m_input; + realIndex = constantInt(idx); break; case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; @@ -358,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm assert(0); break; } - llvm::Value *realIndex = constantInt(idx); if ((mask & TGSI_WRITEMASK_X)) { llvm::Value *xChannel = elementPointer(out, realIndex, 0); new StoreInst(val[0], xChannel, false, m_block); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index ae2fc7c6ae..56886f85e7 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -29,6 +29,7 @@ #define STORAGESOA_H #include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> #include <vector> #include <list> @@ -51,14 +52,13 @@ public: StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps); + llvm::Value *consts); std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx =0); + llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask); + int mask, llvm::IRBuilder<>* m_builder); void addImmediate(float *vec); void declareImmediates(); @@ -76,12 +76,14 @@ private: const char *name(const char *prefix) const; llvm::Value *alignedArrayLoad(llvm::Value *val); llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalFloat(const float val); llvm::Constant *createConstGlobalVector(const std::vector<float> &vec); std::vector<llvm::Value*> inputElement(llvm::Value *indIdx); - std::vector<llvm::Value*> constElement(llvm::Value *indIdx); + llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); + std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); std::vector<llvm::Value*> outputElement(llvm::Value *indIdx); - std::vector<llvm::Value*> tempElement(llvm::Value *indIdx); + std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx); std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx); private: llvm::BasicBlock *m_block; @@ -89,12 +91,13 @@ private: llvm::Value *m_input; llvm::Value *m_output; llvm::Value *m_consts; - llvm::Value *m_temps; + std::map<int, llvm::Value*> m_temps; llvm::GlobalVariable *m_immediates; std::map<int, llvm::Value*> m_addresses; std::vector<std::vector<float> > m_immediatesToFlush; + llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); mutable std::map<int, llvm::ConstantInt*> m_constInts; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 7292c0e366..5b08200d14 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -25,7 +25,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> @@ -52,8 +52,7 @@ static inline FunctionType *vertexShaderFunctionType() // pass are castable to the following: // [4 x <4 x float>] inputs, // [4 x <4 x float>] output, - // [4 x [4 x float]] consts, - // [4 x <4 x float>] temps + // [4 x [1 x float]] consts, std::vector<const Type*> funcArgs; VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -61,13 +60,12 @@ static inline FunctionType *vertexShaderFunctionType() PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 1); PointerType *constsArrayPtr = PointerType::get(constsArray, 0); funcArgs.push_back(vectorArrayPtr);//inputs funcArgs.push_back(vectorArrayPtr);//output funcArgs.push_back(constsArrayPtr);//consts - funcArgs.push_back(vectorArrayPtr);//temps FunctionType *functionType = FunctionType::get( /*Result=*/Type::VoidTy, @@ -162,7 +160,7 @@ translate_immediate(Storage *storage, { float vec[4]; int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: vec[i] = imm->u.ImmediateFloat32[i].Float; @@ -181,7 +179,7 @@ translate_immediateir(StorageSoa *storage, { float vec[4]; int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: vec[i] = imm->u.ImmediateFloat32[i].Float; @@ -707,9 +705,8 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, indIdx); + src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } @@ -1025,9 +1022,9 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + instr->getIRBuilder() ); } } @@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, output->setName("outputs"); Value *consts = args++; consts->setName("consts"); - Value *temps = args++; - temps->setName("temps"); BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); @@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); - StorageSoa storage(label_entry, input, output, consts, temps); + StorageSoa storage(label_entry, input, output, consts); InstructionsSoa instr(mod, shader, label_entry, &storage); while(!tgsi_parse_end_of_tokens(&parse)) { diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile new file mode 100644 index 0000000000..8fa61d265e --- /dev/null +++ b/src/gallium/auxiliary/indices/Makefile @@ -0,0 +1,16 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = indices + +C_SOURCES = \ + u_indices_gen.c + +include ../../Makefile.template + +u_indices_gen.c: u_indices_gen.py + python $< > $@ + + +symlinks: + diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript new file mode 100644 index 0000000000..65a43a9f64 --- /dev/null +++ b/src/gallium/auxiliary/indices/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env.CodeGenerate( + target = 'u_indices_gen.c', + script = 'u_indices_gen.py', + source = [], + command = 'python $SCRIPT > $TARGET' +) + +indices = env.ConvenienceLibrary( + target = 'indices', + source = [ +# 'u_indices.c', + 'u_indices_gen.c', + ]) + +auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c new file mode 100644 index 0000000000..0cf7d88653 --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices.c @@ -0,0 +1,253 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "u_indices.h" +#include "u_indices_priv.h" + +static void translate_memcpy_ushort( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(short)); +} + +static void translate_memcpy_uint( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(int)); +} + + +int u_index_translator( unsigned hw_mask, + unsigned prim, + unsigned in_index_size, + unsigned nr, + unsigned in_pv, + unsigned out_pv, + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_translate_func *out_translate ) +{ + unsigned in_idx; + unsigned out_idx; + int ret = U_TRANSLATE_NORMAL; + + u_index_init(); + + in_idx = in_size_idx(in_index_size); + *out_index_size = (in_index_size == 4) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1<<prim)) && + in_index_size == *out_index_size && + in_pv == out_pv) + { + if (in_index_size == 4) + *out_translate = translate_memcpy_uint; + else + *out_translate = translate_memcpy_ushort; + + *out_prim = prim; + *out_nr = nr; + + return U_TRANSLATE_MEMCPY; + } + else { + switch (prim) { + case PIPE_PRIM_POINTS: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + break; + + case PIPE_PRIM_LINES: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr; + break; + + case PIPE_PRIM_LINE_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = (nr - 1) * 2; + break; + + case PIPE_PRIM_LINE_LOOP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr * 2; + break; + + case PIPE_PRIM_TRIANGLES: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = nr; + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_QUADS: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr / 4) * 6; + break; + + case PIPE_PRIM_QUAD_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_POLYGON: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + default: + assert(0); + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_TRANSLATE_ERROR; + } + } + + return ret; +} + + + + + +int u_index_generator( unsigned hw_mask, + unsigned prim, + unsigned start, + unsigned nr, + unsigned in_pv, + unsigned out_pv, + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_generate_func *out_generate ) + +{ + unsigned out_idx; + + u_index_init(); + + *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1<<prim)) && + (in_pv == out_pv)) { + + *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS]; + *out_prim = prim; + *out_nr = nr; + return U_GENERATE_LINEAR; + } + else { + switch (prim) { + case PIPE_PRIM_POINTS: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINES: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINE_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = (nr - 1) * 2; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINE_LOOP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr * 2; + return U_GENERATE_ONE_OFF; + + case PIPE_PRIM_TRIANGLES: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_QUADS: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr / 4) * 6; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_QUAD_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_POLYGON: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + default: + assert(0); + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_TRANSLATE_ERROR; + } + } +} diff --git a/src/gallium/auxiliary/indices/u_indices.h b/src/gallium/auxiliary/indices/u_indices.h new file mode 100644 index 0000000000..abf5a3037d --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices.h @@ -0,0 +1,83 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef U_INDICES_H +#define U_INDICES_H + +#include "pipe/p_compiler.h" + +#define PV_FIRST 0 +#define PV_LAST 1 +#define PV_COUNT 2 + +typedef void (*u_translate_func)( const void *in, + unsigned nr, + void *out ); + +typedef void (*u_generate_func)( unsigned nr, + void *out ); + + +/* Return codes describe the translate/generate operation. Caller may + * be able to reuse translated indices under some circumstances. + */ +#define U_TRANSLATE_ERROR -1 +#define U_TRANSLATE_NORMAL 1 +#define U_TRANSLATE_MEMCPY 2 +#define U_GENERATE_LINEAR 3 +#define U_GENERATE_REUSABLE 4 +#define U_GENERATE_ONE_OFF 5 + + +void u_index_init( void ); + +int u_index_translator( unsigned hw_mask, + unsigned prim, + unsigned in_index_size, + unsigned nr, + unsigned in_pv, /* API */ + unsigned out_pv, /* hardware */ + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_translate_func *out_translate ); + +/* Note that even when generating it is necessary to know what the + * API's PV is, as the indices generated will depend on whether it is + * the same as hardware or not, and in the case of triangle strips, + * whether it is first or last. + */ +int u_index_generator( unsigned hw_mask, + unsigned prim, + unsigned start, + unsigned nr, + unsigned in_pv, /* API */ + unsigned out_pv, /* hardware */ + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_generate_func *out_generate ); + + +#endif diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c new file mode 100644 index 0000000000..3c981e5d7f --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_gen.c @@ -0,0 +1,5129 @@ +/* File automatically generated by indices.py */ + +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + +/** + * @file + * Functions to translate and generate index lists + */ + +#include "indices/u_indices.h" +#include "indices/u_indices_priv.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static unsigned out_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return OUT_UINT; + case 2: return OUT_USHORT; + default: assert(0); return OUT_USHORT; + } +} + +static unsigned in_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return IN_UINT; + case 2: return IN_USHORT; + case 1: return IN_UBYTE; + default: assert(0); return IN_UBYTE; + } +} + + +static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; +static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; + + + +static void generate_points_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + } +} +static void generate_linestrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } +} +static void generate_lineloop_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(0); +} +static void generate_tris_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + (out+i)[2] = (ushort)(i+2); + } +} +static void generate_tristrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1+(i&1)); + (out+j)[2] = (ushort)(i+2-(i&1)); + } +} +static void generate_trifan_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_quads_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_quadstrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_polygon_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_points_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i); + } +} +static void generate_linestrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } +} +static void generate_lineloop_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i); +} +static void generate_tris_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i+2); + (out+i)[2] = (ushort)(i); + } +} +static void generate_tristrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1+(i&1)); + (out+j)[1] = (ushort)(i+2-(i&1)); + (out+j)[2] = (ushort)(i); + } +} +static void generate_trifan_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_quads_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+1); + (out+j+0)[1] = (ushort)(i+3); + (out+j+0)[2] = (ushort)(i+0); + (out+j+3)[0] = (ushort)(i+2); + (out+j+3)[1] = (ushort)(i+3); + (out+j+3)[2] = (ushort)(i+1); + } +} +static void generate_quadstrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+3); + (out+j+0)[2] = (ushort)(i+2); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+3); + (out+j+3)[2] = (ushort)(i+0); + } +} +static void generate_polygon_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_points_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i); + } +} +static void generate_linestrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } +} +static void generate_lineloop_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i); +} +static void generate_tris_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i+2); + (out+i)[1] = (ushort)(i); + (out+i)[2] = (ushort)(i+1); + } +} +static void generate_tristrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+2); + (out+j)[1] = (ushort)(i+(i&1)); + (out+j)[2] = (ushort)(i+1-(i&1)); + } +} +static void generate_trifan_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+2); + (out+j)[1] = (ushort)(0); + (out+j)[2] = (ushort)(i+1); + } +} +static void generate_quads_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+3); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+1); + (out+j+3)[0] = (ushort)(i+3); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+2); + } +} +static void generate_quadstrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+3); + (out+j+0)[1] = (ushort)(i+2); + (out+j+0)[2] = (ushort)(i+0); + (out+j+3)[0] = (ushort)(i+3); + (out+j+3)[1] = (ushort)(i+0); + (out+j+3)[2] = (ushort)(i+1); + } +} +static void generate_polygon_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_points_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + } +} +static void generate_linestrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } +} +static void generate_lineloop_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(0); +} +static void generate_tris_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + (out+i)[2] = (ushort)(i+2); + } +} +static void generate_tristrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+(i&1)); + (out+j)[1] = (ushort)(i+1-(i&1)); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_trifan_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_quads_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_quadstrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_polygon_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_points_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + } +} +static void generate_linestrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } +} +static void generate_lineloop_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(0); +} +static void generate_tris_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + (out+i)[2] = (uint)(i+2); + } +} +static void generate_tristrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1+(i&1)); + (out+j)[2] = (uint)(i+2-(i&1)); + } +} +static void generate_trifan_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_quads_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+1); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+2); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_quadstrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_polygon_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_points_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i); + } +} +static void generate_linestrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } +} +static void generate_lineloop_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i); +} +static void generate_tris_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i+2); + (out+i)[2] = (uint)(i); + } +} +static void generate_tristrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1+(i&1)); + (out+j)[1] = (uint)(i+2-(i&1)); + (out+j)[2] = (uint)(i); + } +} +static void generate_trifan_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void generate_quads_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+1); + (out+j+0)[1] = (uint)(i+3); + (out+j+0)[2] = (uint)(i+0); + (out+j+3)[0] = (uint)(i+2); + (out+j+3)[1] = (uint)(i+3); + (out+j+3)[2] = (uint)(i+1); + } +} +static void generate_quadstrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+3); + (out+j+0)[2] = (uint)(i+2); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+3); + (out+j+3)[2] = (uint)(i+0); + } +} +static void generate_polygon_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void generate_points_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i); + } +} +static void generate_linestrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } +} +static void generate_lineloop_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i); +} +static void generate_tris_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i+2); + (out+i)[1] = (uint)(i); + (out+i)[2] = (uint)(i+1); + } +} +static void generate_tristrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+2); + (out+j)[1] = (uint)(i+(i&1)); + (out+j)[2] = (uint)(i+1-(i&1)); + } +} +static void generate_trifan_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+2); + (out+j)[1] = (uint)(0); + (out+j)[2] = (uint)(i+1); + } +} +static void generate_quads_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+3); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+1); + (out+j+3)[0] = (uint)(i+3); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+2); + } +} +static void generate_quadstrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+3); + (out+j+0)[1] = (uint)(i+2); + (out+j+0)[2] = (uint)(i+0); + (out+j+3)[0] = (uint)(i+3); + (out+j+3)[1] = (uint)(i+0); + (out+j+3)[2] = (uint)(i+1); + } +} +static void generate_polygon_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_points_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + } +} +static void generate_linestrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } +} +static void generate_lineloop_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(0); +} +static void generate_tris_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + (out+i)[2] = (uint)(i+2); + } +} +static void generate_tristrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+(i&1)); + (out+j)[1] = (uint)(i+1-(i&1)); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_trifan_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_quads_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+1); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+2); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_quadstrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_polygon_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void translate_points_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +void u_index_init( void ) +{ + static int firsttime = 1; + if (!firsttime) return; + firsttime = 0; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2last; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2last; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2last; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2last; +} +#include "indices/u_indices.c" diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py new file mode 100644 index 0000000000..af63d09930 --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_gen.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +copyright = ''' +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +''' + +GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint' +FIRST, LAST = 'first', 'last' + +INTYPES = (GENERATE, UBYTE, USHORT, UINT) +OUTTYPES = (USHORT, UINT) +PVS=(FIRST, LAST) +PRIMS=('points', + 'lines', + 'linestrip', + 'lineloop', + 'tris', + 'trifan', + 'tristrip', + 'quads', + 'quadstrip', + 'polygon') + +LONGPRIMS=('PIPE_PRIM_POINTS', + 'PIPE_PRIM_LINES', + 'PIPE_PRIM_LINE_STRIP', + 'PIPE_PRIM_LINE_LOOP', + 'PIPE_PRIM_TRIANGLES', + 'PIPE_PRIM_TRIANGLE_FAN', + 'PIPE_PRIM_TRIANGLE_STRIP', + 'PIPE_PRIM_QUADS', + 'PIPE_PRIM_QUAD_STRIP', + 'PIPE_PRIM_POLYGON') + +longprim = dict(zip(PRIMS, LONGPRIMS)) +intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT') +outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT') +pv_idx = dict(first='PV_FIRST', last='PV_LAST') + + +def prolog(): + print '''/* File automatically generated by indices.py */''' + print copyright + print r''' + +/** + * @file + * Functions to translate and generate index lists + */ + +#include "indices/u_indices.h" +#include "indices/u_indices_priv.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static unsigned out_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return OUT_UINT; + case 2: return OUT_USHORT; + default: assert(0); return OUT_USHORT; + } +} + +static unsigned in_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return IN_UINT; + case 2: return IN_USHORT; + case 1: return IN_UBYTE; + default: assert(0); return IN_UBYTE; + } +} + + +static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; +static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; + + +''' + +def vert( intype, outtype, v0 ): + if intype == GENERATE: + return '(' + outtype + ')(' + v0 + ')' + else: + return '(' + outtype + ')in[' + v0 + ']' + +def point( intype, outtype, ptr, v0 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + +def line( intype, outtype, ptr, v0, v1 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';' + +def tri( intype, outtype, ptr, v0, v1, v2 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';' + print ' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';' + +def do_point( intype, outtype, ptr, v0 ): + point( intype, outtype, ptr, v0 ) + +def do_line( intype, outtype, ptr, v0, v1, inpv, outpv ): + if inpv == outpv: + line( intype, outtype, ptr, v0, v1 ) + else: + line( intype, outtype, ptr, v1, v0 ) + +def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ): + if inpv == outpv: + tri( intype, outtype, ptr, v0, v1, v2 ) + else: + if inpv == FIRST: + tri( intype, outtype, ptr, v1, v2, v0 ) + else: + tri( intype, outtype, ptr, v2, v0, v1 ) + +def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): + do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); + do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); + +def name(intype, outtype, inpv, outpv, prim): + if intype == GENERATE: + return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv + else: + return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + +def preamble(intype, outtype, inpv, outpv, prim): + print 'static void ' + name( intype, outtype, inpv, outpv, prim ) + '(' + if intype != GENERATE: + print ' const void * _in,' + print ' unsigned nr,' + print ' void *_out )' + print '{' + if intype != GENERATE: + print ' const ' + intype + '*in = (const ' + intype + '*)_in;' + print ' ' + outtype + ' *out = (' + outtype + '*)_out;' + print ' unsigned i, j;' + print ' (void)j;' + +def postamble(): + print '}' + + +def points(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='points') + print ' for (i = 0; i < nr; i++) { ' + do_point( intype, outtype, 'out+i', 'i' ); + print ' }' + postamble() + +def lines(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='lines') + print ' for (i = 0; i < nr; i+=2) { ' + do_line( intype, outtype, 'out+i', 'i', 'i+1', inpv, outpv ); + print ' }' + postamble() + +def linestrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='linestrip') + print ' for (j = i = 0; j < nr; j+=2, i++) { ' + do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); + print ' }' + postamble() + +def lineloop(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='lineloop') + print ' for (j = i = 0; j < nr - 2; j+=2, i++) { ' + do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); + print ' }' + do_line( intype, outtype, 'out+j', 'i', '0', inpv, outpv ); + postamble() + +def tris(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='tris') + print ' for (i = 0; i < nr; i+=3) { ' + do_tri( intype, outtype, 'out+i', 'i', 'i+1', 'i+2', inpv, outpv ); + print ' }' + postamble() + + +def tristrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='tristrip') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + if inpv == FIRST: + do_tri( intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv ); + else: + do_tri( intype, outtype, 'out+j', 'i+(i&1)', 'i+1-(i&1)', 'i+2', inpv, outpv ); + print ' }' + postamble() + + +def trifan(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='trifan') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv ); + print ' }' + postamble() + + + +def polygon(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='polygon') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + if inpv == FIRST: + do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv ); + else: + do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', '0', inpv, outpv ); + print ' }' + postamble() + + +def quads(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='quads') + print ' for (j = i = 0; j < nr; j+=6, i+=4) { ' + do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ); + print ' }' + postamble() + + +def quadstrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='quadstrip') + print ' for (j = i = 0; j < nr; j+=6, i+=2) { ' + do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); + print ' }' + postamble() + + +def emit_funcs(): + for intype in INTYPES: + for outtype in OUTTYPES: + for inpv in (FIRST, LAST): + for outpv in (FIRST, LAST): + points(intype, outtype, inpv, outpv) + lines(intype, outtype, inpv, outpv) + linestrip(intype, outtype, inpv, outpv) + lineloop(intype, outtype, inpv, outpv) + tris(intype, outtype, inpv, outpv) + tristrip(intype, outtype, inpv, outpv) + trifan(intype, outtype, inpv, outpv) + quads(intype, outtype, inpv, outpv) + quadstrip(intype, outtype, inpv, outpv) + polygon(intype, outtype, inpv, outpv) + +def init(intype, outtype, inpv, outpv, prim): + if intype == GENERATE: + print ('generate[' + + outtype_idx[outtype] + + '][' + pv_idx[inpv] + + '][' + pv_idx[outpv] + + '][' + longprim[prim] + + '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';') + else: + print ('translate[' + + intype_idx[intype] + + '][' + outtype_idx[outtype] + + '][' + pv_idx[inpv] + + '][' + pv_idx[outpv] + + '][' + longprim[prim] + + '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';') + + +def emit_all_inits(): + for intype in INTYPES: + for outtype in OUTTYPES: + for inpv in PVS: + for outpv in PVS: + for prim in PRIMS: + init(intype, outtype, inpv, outpv, prim) + +def emit_init(): + print 'void u_index_init( void )' + print '{' + print ' static int firsttime = 1;' + print ' if (!firsttime) return;' + print ' firsttime = 0;' + emit_all_inits() + print '}' + + + + +def epilog(): + print '#include "indices/u_indices.c"' + + +def main(): + prolog() + emit_funcs() + emit_init() + epilog() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/auxiliary/indices/u_indices_priv.h b/src/gallium/auxiliary/indices/u_indices_priv.h new file mode 100644 index 0000000000..9c3298c24d --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_priv.h @@ -0,0 +1,43 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef U_INDICES_PRIV_H +#define U_INDICES_PRIV_H + +#include "pipe/p_compiler.h" +#include "u_indices.h" + +#define IN_UBYTE 0 +#define IN_USHORT 1 +#define IN_UINT 2 +#define IN_COUNT 3 + +#define OUT_USHORT 0 +#define OUT_UINT 1 +#define OUT_COUNT 2 + + +#define PRIM_COUNT (PIPE_PRIM_POLYGON + 1) + +#endif diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index f9b39d9ce0..3b501c51df 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,10 +11,10 @@ C_SOURCES = \ pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ - pb_validate.c \ - pb_winsys.c + pb_validate.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 56a40dda0d..8e9f06abe4 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,10 +10,10 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', - 'pb_winsys.c', ]) auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 8505d333bd..e6b0b30ff4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author Jos� Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_H_ @@ -45,7 +45,8 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" +#include "pipe/p_error.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -56,6 +57,8 @@ extern "C" { struct pb_vtbl; +struct pb_validate; + /** * Buffer description. @@ -104,6 +107,13 @@ struct pb_vtbl void (*unmap)( struct pb_buffer *buf ); + enum pipe_error (*validate)( struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags ); + + void (*fence)( struct pb_buffer *buf, + struct pipe_fence_handle *fence ); + /** * Get the base buffer and the offset. * @@ -118,6 +128,7 @@ struct pb_vtbl void (*get_base_buffer)( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ); + }; @@ -148,6 +159,7 @@ pb_map(struct pb_buffer *buf, assert(buf); if(!buf) return NULL; + assert(buf->base.refcount > 0); return buf->vtbl->map(buf, flags); } @@ -158,6 +170,7 @@ pb_unmap(struct pb_buffer *buf) assert(buf); if(!buf) return; + assert(buf->base.refcount > 0); buf->vtbl->unmap(buf); } @@ -173,7 +186,33 @@ pb_get_base_buffer( struct pb_buffer *buf, offset = 0; return; } + assert(buf->base.refcount > 0); + assert(buf->vtbl->get_base_buffer); buf->vtbl->get_base_buffer(buf, base_buf, offset); + assert(*base_buf); + assert(*offset < (*base_buf)->base.size); +} + + +static INLINE enum pipe_error +pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + assert(buf->vtbl->validate); + return buf->vtbl->validate(buf, vl, flags); +} + + +static INLINE void +pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) +{ + assert(buf); + if(!buf) + return; + assert(buf->vtbl->fence); + buf->vtbl->fence(buf, fence); } @@ -183,6 +222,7 @@ pb_destroy(struct pb_buffer *buf) assert(buf); if(!buf) return; + assert(buf->base.refcount == 0); buf->vtbl->destroy(buf); } @@ -193,11 +233,16 @@ static INLINE void pb_reference(struct pb_buffer **dst, struct pb_buffer *src) { - if (src) + if (src) { + assert(src->base.refcount); src->base.refcount++; + } - if (*dst && --(*dst)->base.refcount == 0) - pb_destroy( *dst ); + if (*dst) { + assert((*dst)->base.refcount); + if(--(*dst)->base.refcount == 0) + pb_destroy( *dst ); + } *dst = src; } @@ -210,7 +255,13 @@ pb_reference(struct pb_buffer **dst, static INLINE boolean pb_check_alignment(size_t requested, size_t provided) { - return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE; + if(!requested) + return TRUE; + if(requested > provided) + return FALSE; + if(provided % requested != 0) + return FALSE; + return TRUE; } @@ -234,10 +285,6 @@ pb_malloc_buffer_create(size_t size, const struct pb_desc *desc); -void -pb_init_winsys(struct pipe_winsys *winsys); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 17b2781052..0cddc95aa6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,7 +29,7 @@ * \file * Implementation of fenced buffers. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ @@ -43,8 +43,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -59,19 +58,12 @@ */ #define SUPER(__derived) (&(__derived)->base) -#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) -#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) -#define PIPE_BUFFER_USAGE_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) - struct fenced_buffer_list { pipe_mutex mutex; - struct pipe_winsys *winsys; + struct pb_fence_ops *ops; size_t numDelayed; @@ -97,6 +89,8 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; + unsigned validation_flags; struct pipe_fence_handle *fence; struct list_head head; @@ -108,7 +102,6 @@ static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { assert(buf); - assert(buf->vtbl == &fenced_buffer_vtbl); return (struct fenced_buffer *)buf; } @@ -146,12 +139,12 @@ static INLINE void _fenced_buffer_remove(struct fenced_buffer_list *fenced_list, struct fenced_buffer *fenced_buf) { - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; assert(fenced_buf->fence); assert(fenced_buf->list == fenced_list); - winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + ops->fence_reference(ops, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); @@ -174,7 +167,7 @@ static INLINE enum pipe_error _fenced_buffer_finish(struct fenced_buffer *fenced_buf) { struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; #if 0 debug_warning("waiting for GPU"); @@ -182,7 +175,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); if(fenced_buf->fence) { - if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { return PIPE_ERROR; } /* Remove from the fenced list */ @@ -202,7 +195,7 @@ static void _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; struct pipe_fence_handle *prev_fence = NULL; @@ -215,15 +208,15 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, if(fenced_buf->fence != prev_fence) { int signaled; if (wait) - signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); + signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else - signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); if (signaled != 0) break; prev_fence = fenced_buf->fence; } else { - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } _fenced_buffer_remove(fenced_list, fenced_buf); @@ -243,14 +236,14 @@ fenced_buffer_destroy(struct pb_buffer *buf) pipe_mutex_lock(fenced_list->mutex); assert(fenced_buf->base.base.refcount == 0); if (fenced_buf->fence) { - struct pipe_winsys *winsys = fenced_list->winsys; - if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) { + struct pb_fence_ops *ops = fenced_list->ops; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { struct list_head *curr, *prev; curr = &fenced_buf->head; prev = curr->prev; do { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); _fenced_buffer_remove(fenced_list, fenced_buf); curr = prev; prev = curr->prev; @@ -274,6 +267,7 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; + assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; @@ -315,6 +309,93 @@ fenced_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + enum pipe_error ret; + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + return PIPE_OK; + } + + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; + + /* Do not validate if buffer is still mapped */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* TODO: wait for the thread that mapped the buffer to unmap it */ + return PIPE_ERROR_RETRY; + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + return PIPE_OK; + } + + /* Final sanity checking */ + assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + assert(!fenced_buf->mapcount); + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + return ret; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + + return PIPE_OK; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pb_fence_ops *ops; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + ops = fenced_list->ops; + + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; + } + + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; +} + + static void fenced_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -325,11 +406,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, } -const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, fenced_buffer_get_base_buffer }; @@ -362,54 +445,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, } -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pipe_winsys *winsys; - /* FIXME: receive this as a parameter */ - unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; - - /* This is a public function, so be extra cautious with the buffer passed, - * as happens frequently to receive null buffers, or pointer to buffers - * other than fenced buffers. */ - assert(buf); - if(!buf) - return; - assert(buf->vtbl == &fenced_buffer_vtbl); - if(buf->vtbl != &fenced_buffer_vtbl) - return; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - winsys = fenced_list->winsys; - - if(!fence || fence == fenced_buf->fence) { - /* Handle the same fence case specially, not only because it is a fast - * path, but mostly to avoid serializing two writes with the same fence, - * as that would bring the hardware down to synchronous operation without - * any benefit. - */ - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return; - } - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); -} - - struct fenced_buffer_list * -fenced_buffer_list_create(struct pipe_winsys *winsys) +fenced_buffer_list_create(struct pb_fence_ops *ops) { struct fenced_buffer_list *fenced_list; @@ -417,7 +454,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys) if (!fenced_list) return NULL; - fenced_list->winsys = winsys; + fenced_list->ops = ops; LIST_INITHEAD(&fenced_list->delayed); @@ -456,6 +493,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) pipe_mutex_unlock(fenced_list->mutex); + fenced_list->ops->destroy(fenced_list->ops); + FREE(fenced_list); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 50d5891bdb..c2b29e974b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,14 +44,14 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_FENCED_H_ #define PB_BUFFER_FENCED_H_ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -59,7 +59,6 @@ extern "C" { #endif -struct pipe_winsys; struct pipe_buffer; struct pipe_fence_handle; @@ -70,12 +69,33 @@ struct pipe_fence_handle; struct fenced_buffer_list; -/** - * The fenced buffer's virtual function table. - * - * NOTE: Made public for debugging purposes. - */ -extern const struct pb_vtbl fenced_buffer_vtbl; +struct pb_fence_ops +{ + void (*destroy)( struct pb_fence_ops *ops ); + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pb_fence_ops *ops, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); +}; /** @@ -84,7 +104,7 @@ extern const struct pb_vtbl fenced_buffer_vtbl; * See also fenced_bufmgr_create for a more convenient way to use this. */ struct fenced_buffer_list * -fenced_buffer_list_create(struct pipe_winsys *winsys); +fenced_buffer_list_create(struct pb_fence_ops *ops); /** @@ -108,17 +128,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced, struct pb_buffer *buffer); -/** - * Set a buffer's fence. - * - * NOTE: Although it takes a generic pb_buffer argument, it will fail - * on everything but buffers returned by fenced_buffer_create. - */ -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 1bf22a2ec0..282802b171 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -34,7 +34,7 @@ */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" @@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +malloc_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +malloc_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void malloc_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -96,6 +114,8 @@ malloc_buffer_vtbl = { malloc_buffer_destroy, malloc_buffer_map, malloc_buffer_unmap, + malloc_buffer_validate, + malloc_buffer_fence, malloc_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index cafbee045a..fec8db91c7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFMGR_H_ @@ -61,7 +61,6 @@ extern "C" { struct pb_desc; struct pipe_buffer; -struct pipe_winsys; /** @@ -163,6 +162,8 @@ pb_cache_manager_create(struct pb_manager *provider, unsigned usecs); +struct pb_fence_ops; + /** * Fenced buffer manager. * @@ -174,7 +175,7 @@ pb_cache_manager_create(struct pb_manager *provider, */ struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pipe_winsys *winsys); + struct pb_fence_ops *ops); struct pb_manager * @@ -183,6 +184,20 @@ pb_alt_manager_create(struct pb_manager *provider1, /** + * Ondemand buffer manager. + * + * Buffers are created in malloc'ed memory (fast and cached), and the constents + * is transfered to a buffer from the provider (typically in slow uncached + * memory) when there is an attempt to validate the buffer. + * + * Ideal for situations where one does not know before hand whether a given + * buffer will effectively be used by the hardware or not. + */ +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider); + + +/** * Debug buffer manager to detect buffer under- and overflows. * * Band size should be a multiple of the largest alignment diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index c956924cc7..db67d46c56 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -34,7 +34,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 8f118874ec..29117efe9b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -29,14 +29,13 @@ * \file * Buffer cache. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -183,6 +182,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_cache_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_cache_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + static void pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -198,6 +216,8 @@ pb_cache_buffer_vtbl = { pb_cache_buffer_destroy, pb_cache_buffer_map, pb_cache_buffer_unmap, + pb_cache_buffer_validate, + pb_cache_buffer_fence, pb_cache_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 1675e6e182..070bf3f517 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -29,13 +29,12 @@ * \file * Debug buffer manager to detect buffer under- and overflows. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -255,11 +254,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, } +static enum pipe_error +pb_debug_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pb_debug_buffer_check(buf); + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_debug_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + const struct pb_vtbl pb_debug_buffer_vtbl = { pb_debug_buffer_destroy, pb_debug_buffer_map, pb_debug_buffer_unmap, + pb_debug_buffer_validate, + pb_debug_buffer_fence, pb_debug_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 633ee70a75..e352f5357b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,11 +30,11 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca <jrfonseca@tungstengraphics.dot.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.dot.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" @@ -86,8 +86,7 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr, fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); if(!fenced_buf) { - assert(buf->base.refcount == 1); - pb_destroy(buf); + pb_reference(&buf, NULL); } return fenced_buf; @@ -123,7 +122,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr) struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pipe_winsys *winsys) + struct pb_fence_ops *ops) { struct fenced_pb_manager *fenced_mgr; @@ -139,7 +138,7 @@ fenced_bufmgr_create(struct pb_manager *provider, fenced_mgr->base.flush = fenced_bufmgr_flush; fenced_mgr->provider = provider; - fenced_mgr->fenced_list = fenced_buffer_list_create(winsys); + fenced_mgr->fenced_list = fenced_buffer_list_create(ops); if(!fenced_mgr->fenced_list) { FREE(fenced_mgr); return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index fe80ca30ee..f3b1ca73b0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,12 +29,12 @@ * \file * Buffer manager using the old texture memory manager. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #include "pipe/p_defines.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -100,7 +100,7 @@ mm_buffer_destroy(struct pb_buffer *buf) assert(buf->base.refcount == 0); pipe_mutex_lock(mm->mutex); - mmFreeMem(mm_buf->block); + u_mmFreeMem(mm_buf->block); FREE(buf); pipe_mutex_unlock(mm->mutex); } @@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +mm_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + return pb_validate(mm->buffer, vl, flags); +} + + +static void +mm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_fence(mm->buffer, fence); +} + + static void mm_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -141,6 +162,8 @@ mm_buffer_vtbl = { mm_buffer_destroy, mm_buffer_map, mm_buffer_unmap, + mm_buffer_validate, + mm_buffer_fence, mm_buffer_get_base_buffer }; @@ -154,8 +177,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, struct mm_buffer *mm_buf; /* We don't handle alignments larger then the one initially setup */ - assert(desc->alignment % (1 << mm->align2) == 0); - if(desc->alignment % (1 << mm->align2)) + assert(pb_check_alignment(desc->alignment, 1 << mm->align2)); + if(!pb_check_alignment(desc->alignment, 1 << mm->align2)) return NULL; pipe_mutex_lock(mm->mutex); @@ -175,14 +198,14 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->mgr = mm; - mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { debug_printf("warning: heap full\n"); #if 0 mmDumpMemInfo(mm->heap); #endif - mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { FREE(mm_buf); pipe_mutex_unlock(mm->mutex); @@ -213,7 +236,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr) pipe_mutex_lock(mm->mutex); - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); pb_unmap(mm->buffer); pb_reference(&mm->buffer, NULL); @@ -254,7 +277,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, if(!mm->map) goto failure; - mm->heap = mmInit(0, size); + mm->heap = u_mmInit(0, size); if (!mm->heap) goto failure; @@ -262,7 +285,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, failure: if(mm->heap) - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); if(mm->map) pb_unmap(mm->buffer); if(mm) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c new file mode 100644 index 0000000000..3d9c7bba0b --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -0,0 +1,303 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A variation of malloc buffers which get transferred to real graphics memory + * when there is an attempt to validate them. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_ondemand_manager; + + +struct pb_ondemand_buffer +{ + struct pb_buffer base; + + struct pb_ondemand_manager *mgr; + + /** Regular malloc'ed memory */ + void *data; + unsigned mapcount; + + /** Real buffer */ + struct pb_buffer *buffer; + size_t size; + struct pb_desc desc; +}; + + +struct pb_ondemand_manager +{ + struct pb_manager base; + + struct pb_manager *provider; +}; + + +extern const struct pb_vtbl pb_ondemand_buffer_vtbl; + +static INLINE struct pb_ondemand_buffer * +pb_ondemand_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_ondemand_buffer_vtbl); + return (struct pb_ondemand_buffer *)buf; +} + +static INLINE struct pb_ondemand_manager * +pb_ondemand_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_ondemand_manager *)mgr; +} + + +static void +pb_ondemand_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + pb_reference(&buf->buffer, NULL); + + align_free(buf->data); + + FREE(buf); +} + + +static void * +pb_ondemand_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + return pb_map(buf->buffer, flags); + } + else { + assert(buf->data); + ++buf->mapcount; + return buf->data; + } +} + + +static void +pb_ondemand_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + pb_unmap(buf->buffer); + } + else { + assert(buf->data); + assert(buf->mapcount); + if(buf->mapcount) + --buf->mapcount; + } +} + + +static enum pipe_error +pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) +{ + if(!buf->buffer) { + struct pb_manager *provider = buf->mgr->provider; + uint8_t *map; + + assert(!buf->mapcount); + + buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); + if(!buf->buffer) + return PIPE_ERROR_OUT_OF_MEMORY; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) { + pb_reference(&buf->buffer, NULL); + return PIPE_ERROR; + } + + memcpy(map, buf->data, buf->size); + + pb_unmap(buf->buffer); + + if(!buf->mapcount) { + FREE(buf->data); + buf->data = NULL; + } + } + + return PIPE_OK; +} + +static enum pipe_error +pb_ondemand_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + enum pipe_error ret; + + assert(!buf->mapcount); + if(buf->mapcount) + return PIPE_ERROR; + + ret = pb_ondemand_buffer_instantiate(buf); + if(ret != PIPE_OK) + return ret; + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_ondemand_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + assert(buf->buffer); + if(!buf->buffer) + return; + + pb_fence(buf->buffer, fence); +} + + +static void +pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { + assert(0); + *base_buf = &buf->base; + *offset = 0; + return; + } + + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_ondemand_buffer_vtbl = { + pb_ondemand_buffer_destroy, + pb_ondemand_buffer_map, + pb_ondemand_buffer_unmap, + pb_ondemand_buffer_validate, + pb_ondemand_buffer_fence, + pb_ondemand_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + struct pb_ondemand_buffer *buf; + + buf = CALLOC_STRUCT(pb_ondemand_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &pb_ondemand_buffer_vtbl; + + buf->mgr = mgr; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + buf->size = size; + buf->desc = *desc; + + return &buf->base; +} + + +static void +pb_ondemand_manager_flush(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + mgr->provider->flush(mgr->provider); +} + + +static void +pb_ondemand_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + FREE(mgr); +} + + +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider) +{ + struct pb_ondemand_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_ondemand_manager); + if(!mgr) + return NULL; + + mgr->base.destroy = pb_ondemand_manager_destroy; + mgr->base.create_buffer = pb_ondemand_manager_create_buffer; + mgr->base.flush = pb_ondemand_manager_flush; + + mgr->provider = provider; + + return &mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 61ac291ed7..12447acfd9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,13 +30,13 @@ * \file * Batch buffer pool management. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pool_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + return pb_validate(pool->buffer, vl, flags); +} + + +static void +pool_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_fence(pool->buffer, fence); +} + + static void pool_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -155,6 +176,8 @@ pool_buffer_vtbl = { pool_buffer_destroy, pool_buffer_map, pool_buffer_unmap, + pool_buffer_validate, + pool_buffer_fence, pool_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 2a80154920..a3259351b9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -38,7 +38,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_slab_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + return pb_validate(buf->slab->bo, vl, flags); +} + + +static void +pb_slab_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_fence(buf->slab->bo, fence); +} + + static void pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = { pb_slab_buffer_destroy, pb_slab_buffer_map, pb_slab_buffer_unmap, + pb_slab_buffer_validate, + pb_slab_buffer_fence, pb_slab_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 1e54fc39d4..150fd50618 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" #include "util/u_memory.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" @@ -46,9 +46,16 @@ #define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ +struct pb_validate_entry +{ + struct pb_buffer *buf; + unsigned flags; +}; + + struct pb_validate { - struct pb_buffer **buffers; + struct pb_validate_entry *entries; unsigned used; unsigned size; }; @@ -56,43 +63,50 @@ struct pb_validate enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf) + struct pb_buffer *buf, + unsigned flags) { assert(buf); if(!buf) return PIPE_ERROR; + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + /* We only need to store one reference for each buffer, so avoid storing - * consecutive references for the same buffer. It might not be the more - * common pasttern, but it is easy to implement. + * consecutive references for the same buffer. It might not be the most + * common pattern, but it is easy to implement. */ - if(vl->used && vl->buffers[vl->used - 1] == buf) { + if(vl->used && vl->entries[vl->used - 1].buf == buf) { + vl->entries[vl->used - 1].flags |= flags; return PIPE_OK; } /* Grow the table */ if(vl->used == vl->size) { unsigned new_size; - struct pb_buffer **new_buffers; + struct pb_validate_entry *new_entries; new_size = vl->size * 2; if(!new_size) return PIPE_ERROR_OUT_OF_MEMORY; - new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, - vl->size*sizeof(struct pb_buffer *), - new_size*sizeof(struct pb_buffer *)); - if(!new_buffers) + new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, + vl->size*sizeof(struct pb_validate_entry), + new_size*sizeof(struct pb_validate_entry)); + if(!new_entries) return PIPE_ERROR_OUT_OF_MEMORY; - memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); + memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); vl->size = new_size; - vl->buffers = new_buffers; + vl->entries = new_entries; } - assert(!vl->buffers[vl->used]); - pb_reference(&vl->buffers[vl->used], buf); + assert(!vl->entries[vl->used].buf); + pb_reference(&vl->entries[vl->used].buf, buf); + vl->entries[vl->used].flags = flags; ++vl->used; return PIPE_OK; @@ -100,10 +114,36 @@ pb_validate_add_buffer(struct pb_validate *vl, enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = callback(vl->entries[i].buf, data); + if(ret != PIPE_OK) + return ret; + } + return PIPE_OK; +} + + +enum pipe_error pb_validate_validate(struct pb_validate *vl) { - /* FIXME: go through each buffer, ensure its not mapped, its address is - * available -- requires a new pb_buffer interface */ + unsigned i; + + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); + if(ret != PIPE_OK) { + while(i--) + pb_validate(vl->entries[i].buf, NULL, 0); + return ret; + } + } + return PIPE_OK; } @@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl, { unsigned i; for(i = 0; i < vl->used; ++i) { - buffer_fence(vl->buffers[i], fence); - pb_reference(&vl->buffers[i], NULL); + pb_fence(vl->entries[i].buf, fence); + pb_reference(&vl->entries[i].buf, NULL); } vl->used = 0; } @@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl) { unsigned i; for(i = 0; i < vl->used; ++i) - pb_reference(&vl->buffers[i], NULL); - FREE(vl->buffers); + pb_reference(&vl->entries[i].buf, NULL); + FREE(vl->entries); FREE(vl); } @@ -142,8 +182,8 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); - if(!vl->buffers) { + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); + if(!vl->entries) { FREE(vl); return NULL; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index 3db1d5330b..dfb84df1ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -58,7 +58,13 @@ struct pb_validate; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf); + struct pb_buffer *buf, + unsigned flags); + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data); /** * Validate all buffers for hardware access. @@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl); /** * Fence all buffers and clear the list. * - * Should be called right before issuing commands to the hardware. + * Should be called right after issuing commands to the hardware. */ void pb_validate_fence(struct pb_validate *vl, diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c deleted file mode 100644 index 28d137dbc4..0000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ /dev/null @@ -1,170 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Implementation of client buffer (also designated as "user buffers"), which - * are just state-tracker owned data masqueraded as buffers. - * - * \author José Fonseca <jrfonseca@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" - - -/** - * User buffers are special buffers that initially reference memory - * held by the user but which may if necessary copy that memory into - * device memory behind the scenes, for submission to hardware. - * - * These are particularly useful when the referenced data is never - * submitted to hardware at all, in the particular case of software - * vertex processing. - */ -struct pb_user_buffer -{ - struct pb_buffer base; - void *data; -}; - - -extern const struct pb_vtbl pb_user_buffer_vtbl; - - -static INLINE struct pb_user_buffer * -pb_user_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &pb_user_buffer_vtbl); - return (struct pb_user_buffer *)buf; -} - - -static void -pb_user_buffer_destroy(struct pb_buffer *buf) -{ - assert(buf); - FREE(buf); -} - - -static void * -pb_user_buffer_map(struct pb_buffer *buf, - unsigned flags) -{ - return pb_user_buffer(buf)->data; -} - - -static void -pb_user_buffer_unmap(struct pb_buffer *buf) -{ - /* No-op */ -} - - -static void -pb_user_buffer_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - - -const struct pb_vtbl -pb_user_buffer_vtbl = { - pb_user_buffer_destroy, - pb_user_buffer_map, - pb_user_buffer_unmap, - pb_user_buffer_get_base_buffer -}; - - -static struct pipe_buffer * -pb_winsys_user_buffer_create(struct pipe_winsys *winsys, - void *data, - unsigned bytes) -{ - struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer); - - if(!buf) - return NULL; - - buf->base.base.refcount = 1; - buf->base.base.size = bytes; - buf->base.base.alignment = 0; - buf->base.base.usage = 0; - - buf->base.vtbl = &pb_user_buffer_vtbl; - buf->data = data; - - return &buf->base.base; -} - - -static void * -pb_winsys_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - (void)winsys; - return pb_map(pb_buffer(buf), flags); -} - - -static void -pb_winsys_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_unmap(pb_buffer(buf)); -} - - -static void -pb_winsys_buffer_destroy(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_destroy(pb_buffer(buf)); -} - - -void -pb_init_winsys(struct pipe_winsys *winsys) -{ - winsys->user_buffer_create = pb_winsys_user_buffer_create; - winsys->buffer_map = pb_winsys_buffer_map; - winsys->buffer_unmap = pb_winsys_buffer_unmap; - winsys->buffer_destroy = pb_winsys_buffer_destroy; -} diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 39b8a4dbd7..252dc5274a 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c \ + rtasm_ppc.c \ rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index 8ea25922aa..eb48368acc 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary( 'rtasm_cpu.c', 'rtasm_execmem.c', 'rtasm_x86sse.c', + 'rtasm_ppc.c', 'rtasm_ppc_spe.c', ]) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index 5499018b21..03bdd47238 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "rtasm_cpu.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index f16191cb61..5acc5bcb7b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -31,19 +31,20 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "rtasm_execmem.h" -#if defined(__linux__) +#if defined(PIPE_OS_LINUX) + /* * Allocate a large block of memory which can hold code then dole it out * in pieces by means of the generic memory manager code. -*/ + */ #include <unistd.h> #include <sys/mman.h> @@ -62,7 +63,7 @@ static void init_heap(void) { if (!exec_heap) - exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); if (!exec_mem) exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, @@ -83,7 +84,7 @@ rtasm_exec_malloc(size_t size) if (exec_heap) { size = (size + 31) & ~31; /* next multiple of 32 bytes */ - block = mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ + block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ } if (block) @@ -103,17 +104,17 @@ rtasm_exec_free(void *addr) pipe_mutex_lock(exec_mutex); if (exec_heap) { - struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); if (block) - mmFreeMem(block); + u_mmFreeMem(block); } pipe_mutex_unlock(exec_mutex); } -#else +#else /* PIPE_OS_LINUX */ /* * Just use regular memory. @@ -133,4 +134,4 @@ rtasm_exec_free(void *addr) } -#endif +#endif /* PIPE_OS_LINUX */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c new file mode 100644 index 0000000000..e3586482db --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -0,0 +1,1077 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * + * \author Brian Paul + */ + + +#include <stdio.h> +#include "util/u_memory.h" +#include "util/u_debug.h" +#include "rtasm_execmem.h" +#include "rtasm_ppc.h" + + +void +ppc_init_func(struct ppc_function *p) +{ + uint i; + + memset(p, 0, sizeof(*p)); + + p->num_inst = 0; + p->max_inst = 100; /* first guess at buffer size */ + p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + p->reg_used = 0x0; + p->fp_used = 0x0; + p->vec_used = 0x0; + + p->print = FALSE; + p->indent = 0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); +} + + +void +ppc_release_func(struct ppc_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + rtasm_exec_free(p->store); + } + p->store = NULL; +} + + +uint +ppc_num_instructions(const struct ppc_function *p) +{ + return p->num_inst; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +void +ppc_print_code(struct ppc_function *p, boolean enable) +{ + p->print = enable; +} + + +void +ppc_indent(struct ppc_function *p, int spaces) +{ + p->indent += spaces; +} + + +static void +indent(const struct ppc_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +void +ppc_comment(struct ppc_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; +} + + +/** + * Allocate a general purpose register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; + return i; + } + } + printf("OUT OF PPC registers!\n"); + return -1; +} + + +/** + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); +} + + +/** + * Allocate a floating point register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; + return i; + } + } + printf("OUT OF PPC FP registers!\n"); + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; + return i; + } + } + printf("OUT OF PPC VEC registers!\n"); + return -1; +} + + +/** + * Mark the given vector register as "unallocated". + */ +void +ppc_release_vec_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_VEC_REGS); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); +} + + +/** + * Append instruction to instruction buffer. Grow buffer if out of room. + */ +static void +emit_instruction(struct ppc_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE); + } + rtasm_exec_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + + +union vx_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned op2:11; + } inst; +}; + +static INLINE void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format, boolean transpose) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, vD, vB, vA); + else + printf(format, vD, vA, vB); + } +} + + +union vxr_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned rC:1; + unsigned op2:10; + } inst; +}; + +static INLINE void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB); + } +} + + +union va_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned vC:5; + unsigned op2:6; + } inst; +}; + +static INLINE void +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC, + const char *format) +{ + union va_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.vC = vC; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB, vC); + } +} + + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) +{ + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + emit_instruction(p, inst.bits); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; + inst.inst.op2 = op2; + inst.inst.lk = lk; + emit_instruction(p, inst.bits); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; +}; + +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2, + const char *format) +{ + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.op2 = op2; + inst.inst.unused = 0x0; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vrs, ra, rb); + } +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static INLINE void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si, + const char *format, boolean transpose) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, rt, si, ra); + else + printf(format, rt, ra, si); + } +} + + +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc, const char *format) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; + inst.inst.op2 = op2; + inst.inst.rc = rc; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, frt, fra, frb); + } +} + + +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc, const char *format) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, rt, ra, rb); + } +} + + + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +void +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE); +} + +/** vector float substract */ +void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float min */ +void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float max */ +void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float mult add: vD = vA * vB + vC */ +void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + /* note arg order */ + emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n"); +} + +/** vector float negative mult subtract: vD = vA - vB * vC */ +void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + /* note arg order */ + emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n"); +} + +/** vector float compare greater than */ +void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u"); +} + +/** vector float compare greater than or equal to */ +void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u"); +} + +/** vector float compare equal */ +void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u"); +} + +/** vector float 2^x */ +void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float log2(x) */ +void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float reciprocol */ +void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float reciprocol sqrt estimate */ +void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to negative infinity */ +void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to positive infinity */ +void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to nearest int */ +void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to int toward zero */ +void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector store: store vR at mem[rA+rB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n"); +} + +/** vector load: vR = mem[rA+rB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n"); +} + +/** load vector element word: vR = mem_word[ra+rb] */ +void +ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n"); +} + + + + +/** + ** vector bitwise operations + **/ + +/** vector and */ +void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE); +} + +/** vector and complement */ +void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE); +} + +/** vector or */ +void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE); +} + +/** vector nor */ +void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE); +} + +/** vector xor */ +void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE); +} + +/** Pseudo-instruction: vector move */ +void +ppc_vmove(struct ppc_function *p, uint vD, uint vA) +{ + boolean print = p->print; + p->print = FALSE; + ppc_vor(p, vD, vA, vA); + if (print) { + indent(p); + printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA); + } + p->print = print; +} + +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + boolean print = p->print; + p->print = FALSE; + ppc_vxor(p, vr, vr, vr); + if (print) { + indent(p); + printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr); + } + p->print = print; +} + + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u"); +} + +/** vector select */ +void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u"); +} + +/** vector splat byte */ +void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat half word */ +void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat word */ +void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n"); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE); +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + boolean print = p->print; + p->print = FALSE; + ppc_addi(p, rt, 0, imm); + if (print) { + indent(p); + printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm); + } + p->print = print; +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n"); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n"); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n"); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n"); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); + if (p->print) { + indent(p); + printf("blr\n"); + } +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); + if (p->print) { + indent(p); + printf("bclr\t%u %u %u\n", condOp, branchHint, condReg); + } +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 0000000000..93e5f5187d --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,342 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 +#define PPC_NUM_VEC_REGS 32 + +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ + int indent; + boolean print; +}; + + + +extern void ppc_init_func(struct ppc_function *p); +extern void ppc_release_func(struct ppc_function *p); +extern uint ppc_num_instructions(const struct ppc_function *p); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern void ppc_print_code(struct ppc_function *p, boolean enable); +extern void ppc_indent(struct ppc_function *p, int spaces); +extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s); + +extern int ppc_reserve_register(struct ppc_function *p, int reg); +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add: vD = vA * vB + vC */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float negative mult subtract: vD = vA - vB * vC */ +extern void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + + + +/** + ** vector bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** Pseudo-instruction: vector move */ +extern void +ppc_vmove(struct ppc_function *p, uint vD, uint vA); + +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + +extern void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + + +#endif /* RTASM_PPC_H */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a04cc6c4ff..53a0e722cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -27,12 +27,16 @@ * Real-time assembly generation interface for Cell B.E. SPEs. * * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ + +#include <stdio.h> #include "pipe/p_compiler.h" #include "util/u_memory.h" #include "rtasm_ppc_spe.h" + #ifdef GALLIUM_CELL /** * SPE instruction types @@ -143,21 +147,91 @@ union spe_inst_RI18 { /*@}*/ -static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) +static void +indent(const struct spe_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +static const char * +rem_prefix(const char *longname) +{ + return longname + 4; +} + + +static const char * +reg_name(int reg) +{ + switch (reg) { + case SPE_REG_SP: + return "$sp"; + case SPE_REG_RA: + return "$lr"; + default: + { + /* cycle through four buffers to handle multiple calls per printf */ + static char buf[4][10]; + static int b = 0; + b = (b + 1) % 4; + sprintf(buf[b], "$%d", reg); + return buf[b]; + } + } +} + + +static void +emit_instruction(struct spe_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); + } + align_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + + + +static void emit_RR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, const char *name) { union spe_inst_RR inst; inst.inst.op = op; inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s\n", + rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); + } } -static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) +static void emit_RRR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, int rC, const char *name) { union spe_inst_RRR inst; inst.inst.op = op; @@ -165,155 +239,212 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rC = rC; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), + reg_name(rA), reg_name(rB), reg_name(rC)); + } } -static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI7(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI7 inst; inst.inst.op = op; inst.inst.i7 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } -static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI8(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI8 inst; inst.inst.op = op; inst.inst.i8 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } -static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI10(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI10 inst; inst.inst.op = op; inst.inst.i10 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } +} + + +/** As above, but do range checking on signed immediate value */ +static void emit_RI10s(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) +{ + assert(imm <= 511); + assert(imm >= -512); + emit_RI10(p, op, rT, rA, imm, name); } -static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) +static void emit_RI16(struct spe_function *p, unsigned op, int rT, + int imm, const char *name) { union spe_inst_RI16 inst; inst.inst.op = op; inst.inst.i16 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } } -static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) +static void emit_RI18(struct spe_function *p, unsigned op, int rT, + int imm, const char *name) { union spe_inst_RI18 inst; inst.inst.op = op; inst.inst.i18 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } } - +#define EMIT(_name, _op) \ +void _name (struct spe_function *p) \ +{ \ + emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ +} #define EMIT_(_name, _op) \ -void _name (struct spe_function *p, unsigned rT) \ +void _name (struct spe_function *p, int rT) \ { \ - emit_RR(p, _op, rT, 0, 0); \ + emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ } #define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +void _name (struct spe_function *p, int rT, int rA) \ { \ - emit_RR(p, _op, rT, rA, 0); \ + emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ } #define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +void _name (struct spe_function *p, int rT, int rA, int rB) \ { \ - emit_RR(p, _op, rT, rA, rB); \ + emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ } #define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ { \ - emit_RRR(p, _op, rT, rA, rB, rC); \ + emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ } #define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI7(p, _op, rT, rA, imm); \ + emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI8(_name, _op, bias) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI8(p, _op, rT, rA, bias - imm); \ + emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ } #define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI10(p, _op, rT, rA, imm); \ + emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ +} + +#define EMIT_RI10s(_name, _op) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ +{ \ + emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ - emit_RI16(p, _op, rT, imm); \ + emit_RI16(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ - emit_RI18(p, _op, rT, imm); \ + emit_RI18(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_I16(_name, _op) \ void _name (struct spe_function *p, int imm) \ { \ - emit_RI16(p, _op, 0, imm); \ + emit_RI16(p, _op, 0, imm, __FUNCTION__); \ } #include "rtasm_ppc_spe.h" + /** * Initialize an spe_function. - * \param code_size size of instruction buffer to allocate, in bytes. + * \param code_size initial size of instruction buffer to allocate, in bytes. + * If zero, use a default. */ void spe_init_func(struct spe_function *p, unsigned code_size) { - p->store = align_malloc(code_size, 16); + uint i; + + if (!code_size) + code_size = 64; + p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->store = align_malloc(code_size, 16); + + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ - p->regs[0] = ~7; - p->regs[1] = (1U << (80 - 64)) - 1; + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } + + p->print = FALSE; + p->indent = 0; } @@ -327,20 +458,23 @@ void spe_release_func(struct spe_function *p) } +/** Return current code size in bytes. */ +unsigned spe_code_size(const struct spe_function *p) +{ + return p->num_inst * SPE_INST_SIZE; +} + + /** - * Alloate a SPE register. + * Allocate a SPE register. * \return register index or -1 if none left. */ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < SPE_NUM_REGS; i++) { - const uint64_t mask = (1ULL << (i % 64)); - const unsigned idx = i / 64; - - assert(idx < 2); - if ((p->regs[idx] & mask) != 0) { - p->regs[idx] &= ~mask; + if (p->regs[i] == 0) { + p->regs[i] = 1; return i; } } @@ -354,31 +488,161 @@ int spe_allocate_available_register(struct spe_function *p) */ int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) != 0); - - p->regs[idx] &= ~(1ULL << bit); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; return reg; } /** - * Mark the given SPE register as "unallocated". + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. */ void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; + assert(reg >= 0); + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); - assert(idx < 2); + p->regs[reg] = 0; +} + +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + uint i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + uint i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; + + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]--; + } +} + + +unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]) +{ + unsigned i, num = 0; + /* only count registers in the range available to callers */ + for (i = 2; i < 80; i++) { + if (p->regs[i]) { + used[num++] = i; + } + } + return num; +} + + +void +spe_print_code(struct spe_function *p, boolean enable) +{ + p->print = enable; +} - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) == 0); - p->regs[idx] |= (1ULL << bit); +void +spe_indent(struct spe_function *p, int spaces) +{ + p->indent += spaces; +} + + +void +spe_comment(struct spe_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + +/** + * Load quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_lqd(struct spe_function *p, int rT, int rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * Store quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_stqd(struct spe_function *p, int rT, int rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } } @@ -390,53 +654,53 @@ void spe_release_register(struct spe_function *p, int reg) */ /** Branch Indirect to address in rA */ -void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +void spe_bi(struct spe_function *p, int rA, int d, int e) { - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Interupt Return */ -void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +void spe_iret(struct spe_function *p, int rA, int d, int e) { - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link on external data */ -void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link. Save PC in rT, jump to rA. */ -void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ -void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ -void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ -void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } @@ -454,7 +718,6 @@ hbrr; #if 0 stop; EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_lnop, 0x001); EMIT_ (spe_nop, 0x201); sync; EMIT_ (spe_dsync, 0x003); @@ -471,7 +734,7 @@ EMIT_R (spe_mtspr, 0x10c); void -spe_load_float(struct spe_function *p, unsigned rT, float x) +spe_load_float(struct spe_function *p, int rT, float x) { if (x == 0.0f) { spe_il(p, rT, 0x0); @@ -498,45 +761,307 @@ spe_load_float(struct spe_function *p, unsigned rT, float x) void -spe_load_int(struct spe_function *p, unsigned rT, int i) +spe_load_int(struct spe_function *p, int rT, int i) { if (-32768 <= i && i <= 32767) { spe_il(p, rT, i); } else { spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (i & 0xffff) + spe_iohl(p, rT, i & 0xffff); + } +} + +void spe_load_uint(struct spe_function *p, int rT, uint ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. + */ + if ((ui & 0x0003ffff) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + uint mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } + else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); + } +} + +/** + * This function is constructed identically to spe_xor_uint() below. + * Changes to one should be made in the other. + */ +void +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + uint tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + + +/** + * This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ +void +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + uint tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); } } +void +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +spe_splat(struct spe_function *p, int rT, int rA) { - spe_ila(p, rT, 66051); - spe_shufb(p, rT, rA, rA, rT); + /* Use a temporary, just in case rT == rA */ + int tmp_reg = spe_allocate_available_register(p); + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, tmp_reg, 0x00010203); + spe_shufb(p, rT, rA, rA, tmp_reg); + spe_release_register(p, tmp_reg); } void -spe_complement(struct spe_function *p, unsigned rT) +spe_complement(struct spe_function *p, int rT, int rA) { - spe_nor(p, rT, rT, rT); + spe_nor(p, rT, rA, rA); } void -spe_move(struct spe_function *p, unsigned rT, unsigned rA) +spe_move(struct spe_function *p, int rT, int rA) { - spe_ori(p, rT, rA, 0); + /* Use different instructions depending on the instruction address + * to take advantage of the dual pipelines. + */ + if (p->num_inst & 1) + spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ + else + spe_ori(p, rT, rA, 0); /* even pipe */ } void -spe_zero(struct spe_function *p, unsigned rT) +spe_zero(struct spe_function *p, int rT) { spe_xor(p, rT, rT, rT); } +void +spe_splat_word(struct spe_function *p, int rT, int rA, int word) +{ + assert(word >= 0); + assert(word <= 3); + + if (word == 0) { + int tmp1 = rT; + spe_ila(p, tmp1, 66051); + spe_shufb(p, rT, rA, rA, tmp1); + } + else { + /* XXX review this, we may not need the rotqbyi instruction */ + int tmp1 = rT; + int tmp2 = spe_allocate_available_register(p); + + spe_ila(p, tmp1, 66051); + spe_rotqbyi(p, tmp2, rA, 4 * word); + spe_shufb(p, rT, tmp2, tmp2, tmp1); + + spe_release_register(p, tmp2); + } +} + +/** + * For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, int rT, int rA, int rB) +{ + int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/** + * For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, int rT, int rA, int rB) +{ + int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d95e5aace3..65d9c77415 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -28,6 +28,7 @@ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ #ifndef RTASM_PPC_SPE_H @@ -39,10 +40,10 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 -/** Return Address register */ +/** Return Address register (aka $lr / Link Register) */ #define SPE_REG_RA 0 -/** Stack Pointer register */ +/** Stack Pointer register (aka $sp) */ #define SPE_REG_SP 1 @@ -52,308 +53,371 @@ struct spe_function uint num_inst; uint max_inst; - /** - * Mask of used / unused registers - * - * Each set bit corresponds to an available register. Each cleared bit - * corresponds to an allocated register. + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. * * \sa * spe_allocate_register, spe_allocate_available_register, - * spe_release_register + * spe_allocate_register_set, spe_release_register_set, spe_release_register, */ - uint64_t regs[SPE_NUM_REGS / 64]; + unsigned char regs[SPE_NUM_REGS]; + + boolean print; /**< print/dump instructions as they're emitted? */ + int indent; /**< number of spaces to indent */ }; -extern void spe_init_func(struct spe_function *p, unsigned code_size); + +extern void spe_init_func(struct spe_function *p, uint code_size); extern void spe_release_func(struct spe_function *p); +extern uint spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); + +extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); + +extern void spe_print_code(struct spe_function *p, boolean enable); +extern void spe_indent(struct spe_function *p, int spaces); +extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); + #endif /* RTASM_PPC_SPE_H */ -#ifndef EMIT_ -#define EMIT_(name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) +#ifndef EMIT +#define EMIT(_name, _op) \ + extern void _name (struct spe_function *p); +#define EMIT_(_name, _op) \ + extern void _name (struct spe_function *p, int rT); #define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) + extern void _name (struct spe_function *p, int rT, int rA); #define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) + extern void _name (struct spe_function *p, int rT, int rA, int rB); #define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) + extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); #define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI8(_name, _op, bias) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); +#define EMIT_RI10s(_name, _op) \ + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) + extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS -#endif /* EMIT_ */ +#endif /* EMIT */ /* Memory load / store instructions */ -EMIT_RI10(spe_lqd, 0x034); -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); +EMIT_RR (spe_lqx, 0x1c4) +EMIT_RI16(spe_lqa, 0x061) +EMIT_RI16(spe_lqr, 0x067) +EMIT_RR (spe_stqx, 0x144) +EMIT_RI16(spe_stqa, 0x041) +EMIT_RI16(spe_stqr, 0x047) +EMIT_RI7 (spe_cbd, 0x1f4) +EMIT_RR (spe_cbx, 0x1d4) +EMIT_RI7 (spe_chd, 0x1f5) +EMIT_RI7 (spe_chx, 0x1d5) +EMIT_RI7 (spe_cwd, 0x1f6) +EMIT_RI7 (spe_cwx, 0x1d6) +EMIT_RI7 (spe_cdd, 0x1f7) +EMIT_RI7 (spe_cdx, 0x1d7) /* Constant formation instructions */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x065); +EMIT_RI16(spe_ilh, 0x083) +EMIT_RI16(spe_ilhu, 0x082) +EMIT_RI16(spe_il, 0x081) +EMIT_RI18(spe_ila, 0x021) +EMIT_RI16(spe_iohl, 0x0c1) +EMIT_RI16(spe_fsmbi, 0x065) /* Integer and logical instructions */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10(spe_andbi, 0x016); -EMIT_RI10(spe_andhi, 0x015); -EMIT_RI10(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10(spe_orbi, 0x006); -EMIT_RI10(spe_orhi, 0x005); -EMIT_RI10(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10(spe_xorbi, 0x026); -EMIT_RI10(spe_xorhi, 0x025); -EMIT_RI10(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); +EMIT_RR (spe_ah, 0x0c8) +EMIT_RI10(spe_ahi, 0x01d) +EMIT_RR (spe_a, 0x0c0) +EMIT_RI10s(spe_ai, 0x01c) +EMIT_RR (spe_sfh, 0x048) +EMIT_RI10(spe_sfhi, 0x00d) +EMIT_RR (spe_sf, 0x040) +EMIT_RI10(spe_sfi, 0x00c) +EMIT_RR (spe_addx, 0x340) +EMIT_RR (spe_cg, 0x0c2) +EMIT_RR (spe_cgx, 0x342) +EMIT_RR (spe_sfx, 0x341) +EMIT_RR (spe_bg, 0x042) +EMIT_RR (spe_bgx, 0x343) +EMIT_RR (spe_mpy, 0x3c4) +EMIT_RR (spe_mpyu, 0x3cc) +EMIT_RI10(spe_mpyi, 0x074) +EMIT_RI10(spe_mpyui, 0x075) +EMIT_RRR (spe_mpya, 0x00c) +EMIT_RR (spe_mpyh, 0x3c5) +EMIT_RR (spe_mpys, 0x3c7) +EMIT_RR (spe_mpyhh, 0x3c6) +EMIT_RR (spe_mpyhha, 0x346) +EMIT_RR (spe_mpyhhu, 0x3ce) +EMIT_RR (spe_mpyhhau, 0x34e) +EMIT_R (spe_clz, 0x2a5) +EMIT_R (spe_cntb, 0x2b4) +EMIT_R (spe_fsmb, 0x1b6) +EMIT_R (spe_fsmh, 0x1b5) +EMIT_R (spe_fsm, 0x1b4) +EMIT_R (spe_gbb, 0x1b2) +EMIT_R (spe_gbh, 0x1b1) +EMIT_R (spe_gb, 0x1b0) +EMIT_RR (spe_avgb, 0x0d3) +EMIT_RR (spe_absdb, 0x053) +EMIT_RR (spe_sumb, 0x253) +EMIT_R (spe_xsbh, 0x2b6) +EMIT_R (spe_xshw, 0x2ae) +EMIT_R (spe_xswd, 0x2a6) +EMIT_RR (spe_and, 0x0c1) +EMIT_RR (spe_andc, 0x2c1) +EMIT_RI10s(spe_andbi, 0x016) +EMIT_RI10s(spe_andhi, 0x015) +EMIT_RI10s(spe_andi, 0x014) +EMIT_RR (spe_or, 0x041) +EMIT_RR (spe_orc, 0x2c9) +EMIT_RI10s(spe_orbi, 0x006) +EMIT_RI10s(spe_orhi, 0x005) +EMIT_RI10s(spe_ori, 0x004) +EMIT_R (spe_orx, 0x1f0) +EMIT_RR (spe_xor, 0x241) +EMIT_RI10s(spe_xorbi, 0x046) +EMIT_RI10s(spe_xorhi, 0x045) +EMIT_RI10s(spe_xori, 0x044) +EMIT_RR (spe_nand, 0x0c9) +EMIT_RR (spe_nor, 0x049) +EMIT_RR (spe_eqv, 0x249) +EMIT_RRR (spe_selb, 0x008) +EMIT_RRR (spe_shufb, 0x00b) /* Shift and rotate instructions */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); +EMIT_RR (spe_shlh, 0x05f) +EMIT_RI7 (spe_shlhi, 0x07f) +EMIT_RR (spe_shl, 0x05b) +EMIT_RI7 (spe_shli, 0x07b) +EMIT_RR (spe_shlqbi, 0x1db) +EMIT_RI7 (spe_shlqbii, 0x1fb) +EMIT_RR (spe_shlqby, 0x1df) +EMIT_RI7 (spe_shlqbyi, 0x1ff) +EMIT_RR (spe_shlqbybi, 0x1cf) +EMIT_RR (spe_roth, 0x05c) +EMIT_RI7 (spe_rothi, 0x07c) +EMIT_RR (spe_rot, 0x058) +EMIT_RI7 (spe_roti, 0x078) +EMIT_RR (spe_rotqby, 0x1dc) +EMIT_RI7 (spe_rotqbyi, 0x1fc) +EMIT_RR (spe_rotqbybi, 0x1cc) +EMIT_RR (spe_rotqbi, 0x1d8) +EMIT_RI7 (spe_rotqbii, 0x1f8) +EMIT_RR (spe_rothm, 0x05d) +EMIT_RI7 (spe_rothmi, 0x07d) +EMIT_RR (spe_rotm, 0x059) +EMIT_RI7 (spe_rotmi, 0x079) +EMIT_RR (spe_rotqmby, 0x1dd) +EMIT_RI7 (spe_rotqmbyi, 0x1fd) +EMIT_RR (spe_rotqmbybi, 0x1cd) +EMIT_RR (spe_rotqmbi, 0x1c9) +EMIT_RI7 (spe_rotqmbii, 0x1f9) +EMIT_RR (spe_rotmah, 0x05e) +EMIT_RI7 (spe_rotmahi, 0x07e) +EMIT_RR (spe_rotma, 0x05a) +EMIT_RI7 (spe_rotmai, 0x07a) /* Compare, branch, and halt instructions */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); +EMIT_RR (spe_heq, 0x3d8) +EMIT_RI10(spe_heqi, 0x07f) +EMIT_RR (spe_hgt, 0x258) +EMIT_RI10(spe_hgti, 0x04f) +EMIT_RR (spe_hlgt, 0x2d8) +EMIT_RI10(spe_hlgti, 0x05f) +EMIT_RR (spe_ceqb, 0x3d0) +EMIT_RI10(spe_ceqbi, 0x07e) +EMIT_RR (spe_ceqh, 0x3c8) +EMIT_RI10(spe_ceqhi, 0x07d) +EMIT_RR (spe_ceq, 0x3c0) +EMIT_RI10(spe_ceqi, 0x07c) +EMIT_RR (spe_cgtb, 0x250) +EMIT_RI10(spe_cgtbi, 0x04e) +EMIT_RR (spe_cgth, 0x248) +EMIT_RI10(spe_cgthi, 0x04d) +EMIT_RR (spe_cgt, 0x240) +EMIT_RI10(spe_cgti, 0x04c) +EMIT_RR (spe_clgtb, 0x2d0) +EMIT_RI10(spe_clgtbi, 0x05e) +EMIT_RR (spe_clgth, 0x2c8) +EMIT_RI10(spe_clgthi, 0x05d) +EMIT_RR (spe_clgt, 0x2c0) +EMIT_RI10(spe_clgti, 0x05c) +EMIT_I16 (spe_br, 0x064) +EMIT_I16 (spe_bra, 0x060) +EMIT_RI16(spe_brsl, 0x066) +EMIT_RI16(spe_brasl, 0x062) +EMIT_RI16(spe_brnz, 0x042) +EMIT_RI16(spe_brz, 0x040) +EMIT_RI16(spe_brhnz, 0x046) +EMIT_RI16(spe_brhz, 0x044) -extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, +/* Control instructions + */ +EMIT (spe_lnop, 0x001) + +extern void +spe_lqd(struct spe_function *p, int rT, int rA, int offset); + +extern void +spe_stqd(struct spe_function *p, int rT, int rA, int offset); + +extern void spe_bi(struct spe_function *p, int rA, int d, int e); +extern void spe_iret(struct spe_function *p, int rA, int d, int e); +extern void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_biz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_binz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e); /** Load/splat immediate float into rT. */ extern void -spe_load_float(struct spe_function *p, unsigned rT, float x); +spe_load_float(struct spe_function *p, int rT, float x); /** Load/splat immediate int into rT. */ extern void -spe_load_int(struct spe_function *p, unsigned rT, int i); +spe_load_int(struct spe_function *p, int rT, int i); + +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, int rT, uint ui); + +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); /** Replicate word 0 of rA across rT. */ extern void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA); +spe_splat(struct spe_function *p, int rT, int rA); -/** Complement/invert all bits in rT. */ +/** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT); +spe_complement(struct spe_function *p, int rT, int rA); /** rT = rA. */ extern void -spe_move(struct spe_function *p, unsigned rT, unsigned rA); +spe_move(struct spe_function *p, int rT, int rA); /** rT = {0,0,0,0}. */ extern void -spe_zero(struct spe_function *p, unsigned rT); +spe_zero(struct spe_function *p, int rT); + +/** rT = splat(rA, word) */ +extern void +spe_splat_word(struct spe_function *p, int rT, int rA, int word); + +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, int rT, int rA, int rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, int rT, int rA, int rB); /* Floating-point instructions */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da, 155); -EMIT_RI8 (spe_cflts, 0x1d8, 173); -EMIT_RI8 (spe_cuflt, 0x1db, 155); -EMIT_RI8 (spe_cfltu, 0x1d9, 173); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); +EMIT_RR (spe_fa, 0x2c4) +EMIT_RR (spe_dfa, 0x2cc) +EMIT_RR (spe_fs, 0x2c5) +EMIT_RR (spe_dfs, 0x2cd) +EMIT_RR (spe_fm, 0x2c6) +EMIT_RR (spe_dfm, 0x2ce) +EMIT_RRR (spe_fma, 0x00e) +EMIT_RR (spe_dfma, 0x35c) +EMIT_RRR (spe_fnms, 0x00d) +EMIT_RR (spe_dfnms, 0x35e) +EMIT_RRR (spe_fms, 0x00f) +EMIT_RR (spe_dfms, 0x35d) +EMIT_RR (spe_dfnma, 0x35f) +EMIT_R (spe_frest, 0x1b8) +EMIT_R (spe_frsqest, 0x1b9) +EMIT_RR (spe_fi, 0x3d4) +EMIT_RI8 (spe_csflt, 0x1da, 155) +EMIT_RI8 (spe_cflts, 0x1d8, 173) +EMIT_RI8 (spe_cuflt, 0x1db, 155) +EMIT_RI8 (spe_cfltu, 0x1d9, 173) +EMIT_R (spe_frds, 0x3b9) +EMIT_R (spe_fesd, 0x3b8) +EMIT_RR (spe_dfceq, 0x3c3) +EMIT_RR (spe_dfcmeq, 0x3cb) +EMIT_RR (spe_dfcgt, 0x2c3) +EMIT_RR (spe_dfcmgt, 0x2cb) +EMIT_RI7 (spe_dftsv, 0x3bf) +EMIT_RR (spe_fceq, 0x3c2) +EMIT_RR (spe_fcmeq, 0x3ca) +EMIT_RR (spe_fcgt, 0x2c2) +EMIT_RR (spe_fcmgt, 0x2ca) +EMIT_R (spe_fscrwr, 0x3ba) +EMIT_ (spe_fscrrd, 0x398) /* Channel instructions */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); +EMIT_R (spe_rdch, 0x00d) +EMIT_R (spe_rdchcnt, 0x00f) +EMIT_R (spe_wrch, 0x10d) #ifdef UNDEF_EMIT_MACROS +#undef EMIT #undef EMIT_ #undef EMIT_R #undef EMIT_RR @@ -361,6 +425,7 @@ EMIT_R (spe_wrch, 0x10d); #undef EMIT_RI7 #undef EMIT_RI8 #undef EMIT_RI10 +#undef EMIT_RI10s #undef EMIT_RI16 #undef EMIT_RI18 #undef EMIT_I16 diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index ad9d8f8ced..57fcf6de2a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -26,7 +26,7 @@ #if defined(PIPE_ARCH_X86) #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_pointer.h" #include "rtasm_execmem.h" @@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -439,25 +440,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) { - DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); - emit_1ub(p, 0x80); - emit_modrm_noreg(p, 0, dst); - emit_1ub(p, imm); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af79f07dd3..1b5eaaca85 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c7155a9316..d7df9490cf 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ tgsi_info.c \ tgsi_iterate.c \ tgsi_parse.c \ + tgsi_ppc.c \ tgsi_scan.c \ tgsi_sse2.c \ tgsi_text.c \ diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 45bf3f6d57..8200cce42f 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_parse.c', 'tgsi_sanity.c', 'tgsi_scan.c', + 'tgsi_ppc.c', 'tgsi_sse2.c', 'tgsi_text.c', 'tgsi_transform.c', diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index ed8fc5ac25..a1891a140a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" @@ -114,7 +114,7 @@ tgsi_default_declaration( void ) struct tgsi_declaration declaration; declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; - declaration.Size = 1; + declaration.NrTokens = 1; declaration.File = TGSI_FILE_NULL; declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; @@ -160,9 +160,9 @@ declaration_grow( struct tgsi_declaration *declaration, struct tgsi_header *header ) { - assert( declaration->Size < 0xFF ); + assert( declaration->NrTokens < 0xFF ); - declaration->Size++; + declaration->NrTokens++; header_bodysize_grow( header ); } @@ -308,7 +308,7 @@ tgsi_default_immediate( void ) struct tgsi_immediate immediate; immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; - immediate.Size = 1; + immediate.NrTokens = 1; immediate.DataType = TGSI_IMM_FLOAT32; immediate.Padding = 0; immediate.Extended = 0; @@ -345,9 +345,9 @@ immediate_grow( struct tgsi_immediate *immediate, struct tgsi_header *header ) { - assert( immediate->Size < 0xFF ); + assert( immediate->NrTokens < 0xFF ); - immediate->Size++; + immediate->NrTokens++; header_bodysize_grow( header ); } @@ -384,7 +384,7 @@ tgsi_build_full_immediate( *immediate = tgsi_build_immediate( header ); - for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) { struct tgsi_immediate_float32 *if32; if( maxsize <= size ) @@ -411,7 +411,7 @@ tgsi_default_instruction( void ) struct tgsi_instruction instruction; instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.Size = 1; + instruction.NrTokens = 1; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; instruction.NumDstRegs = 1; @@ -453,9 +453,9 @@ instruction_grow( struct tgsi_instruction *instruction, struct tgsi_header *header ) { - assert (instruction->Size < 0xFF); + assert (instruction->NrTokens < 0xFF); - instruction->Size++; + instruction->NrTokens++; header_bodysize_grow( header ); } @@ -801,10 +801,14 @@ tgsi_default_instruction_ext_nv( void ) return instruction_ext_nv; } -union token_u32 + +/** test for inequality of 32-bit values pointed to by a and b */ +static INLINE boolean +compare32(const void *a, const void *b) { - unsigned u32; -}; + return *((uint32_t *) a) != *((uint32_t *) b); +} + unsigned tgsi_compare_instruction_ext_nv( @@ -813,7 +817,7 @@ tgsi_compare_instruction_ext_nv( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_nv @@ -872,7 +876,7 @@ tgsi_compare_instruction_ext_label( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_label @@ -913,7 +917,7 @@ tgsi_compare_instruction_ext_texture( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_texture @@ -1035,7 +1039,7 @@ tgsi_compare_src_register_ext_swz( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_swz @@ -1103,7 +1107,7 @@ tgsi_compare_src_register_ext_mod( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_mod @@ -1249,7 +1253,7 @@ tgsi_compare_dst_register_ext_concode( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_concode @@ -1307,7 +1311,7 @@ tgsi_compare_dst_register_ext_modulate( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_modulate diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 2ed8c2bf07..d57cb9139f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_info.h" @@ -285,7 +285,7 @@ iter_immediate( ENM( imm->Immediate.DataType, immediate_type_names ); TXT( " { " ); - for (i = 0; i < imm->Immediate.Size - 1; i++) { + for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: FLT( imm->u.ImmediateFloat32[i].Float ); @@ -294,7 +294,7 @@ iter_immediate( assert( 0 ); } - if (i < imm->Immediate.Size - 2) + if (i < imm->Immediate.NrTokens - 2) TXT( ", " ); } TXT( " }" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index be25cb45a0..3dc61c48ca 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" @@ -283,7 +283,7 @@ dump_immediate_verbose( UIX( imm->Immediate.Padding ); } - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { EOL(); switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: @@ -646,7 +646,6 @@ tgsi_dump_c( struct tgsi_full_declaration fd; uint ignored = flags & TGSI_DUMP_C_IGNORED; uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; tgsi_parse_init( &parse, tokens ); @@ -676,7 +675,7 @@ tgsi_dump_c( ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); if( ignored ) { TXT( "\nSize : " ); - UID( parse.FullToken.Token.Size ); + UID( parse.FullToken.Token.NrTokens ); if( deflt || parse.FullToken.Token.Extended ) { TXT( "\nExtended : " ); UID( parse.FullToken.Token.Extended ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f98b66dc0b..5c5d8d2550 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers) + struct tgsi_sampler **samplers) { uint k; struct tgsi_parse_context parse; @@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader( case TGSI_TOKEN_TYPE_IMMEDIATE: { - uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert( size % 4 == 0 ); assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); @@ -320,6 +320,7 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } +#if 0 static void micro_iadd( union tgsi_exec_channel *dst, @@ -331,6 +332,7 @@ micro_iadd( dst->i[2] = src0->i[2] + src1->i[2]; dst->i[3] = src0->i[3] + src1->i[3]; } +#endif static void micro_and( @@ -408,6 +410,7 @@ micro_div( } } +#if 0 static void micro_udiv( union tgsi_exec_channel *dst, @@ -419,6 +422,7 @@ micro_udiv( dst->u[2] = src0->u[2] / src1->u[2]; dst->u[3] = src0->u[3] / src1->u[3]; } +#endif static void micro_eq( @@ -434,6 +438,7 @@ micro_eq( dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ieq( union tgsi_exec_channel *dst, @@ -447,6 +452,7 @@ micro_ieq( dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif static void micro_exp2( @@ -466,6 +472,7 @@ micro_exp2( #endif } +#if 0 static void micro_f2ut( union tgsi_exec_channel *dst, @@ -476,6 +483,7 @@ micro_f2ut( dst->u[2] = (uint) src->f[2]; dst->u[3] = (uint) src->f[3]; } +#endif static void micro_flr( @@ -570,6 +578,7 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ilt( union tgsi_exec_channel *dst, @@ -583,7 +592,9 @@ micro_ilt( dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif +#if 0 static void micro_ult( union tgsi_exec_channel *dst, @@ -597,6 +608,7 @@ micro_ult( dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; } +#endif static void micro_max( @@ -610,6 +622,7 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imax( union tgsi_exec_channel *dst, @@ -621,7 +634,9 @@ micro_imax( dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umax( union tgsi_exec_channel *dst, @@ -633,6 +648,7 @@ micro_umax( dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif static void micro_min( @@ -646,6 +662,7 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imin( union tgsi_exec_channel *dst, @@ -657,7 +674,9 @@ micro_imin( dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umin( union tgsi_exec_channel *dst, @@ -669,7 +688,9 @@ micro_umin( dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif +#if 0 static void micro_umod( union tgsi_exec_channel *dst, @@ -681,6 +702,7 @@ micro_umod( dst->u[2] = src0->u[2] % src1->u[2]; dst->u[3] = src0->u[3] % src1->u[3]; } +#endif static void micro_mul( @@ -694,6 +716,7 @@ micro_mul( dst->f[3] = src0->f[3] * src1->f[3]; } +#if 0 static void micro_imul( union tgsi_exec_channel *dst, @@ -705,7 +728,9 @@ micro_imul( dst->i[2] = src0->i[2] * src1->i[2]; dst->i[3] = src0->i[3] * src1->i[3]; } +#endif +#if 0 static void micro_imul64( union tgsi_exec_channel *dst0, @@ -722,7 +747,9 @@ micro_imul64( dst0->i[2] = 0; dst0->i[3] = 0; } +#endif +#if 0 static void micro_umul64( union tgsi_exec_channel *dst0, @@ -739,7 +766,10 @@ micro_umul64( dst0->u[2] = 0; dst0->u[3] = 0; } +#endif + +#if 0 static void micro_movc( union tgsi_exec_channel *dst, @@ -752,6 +782,7 @@ micro_movc( dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +#endif static void micro_neg( @@ -764,6 +795,7 @@ micro_neg( dst->f[3] = -src->f[3]; } +#if 0 static void micro_ineg( union tgsi_exec_channel *dst, @@ -774,6 +806,7 @@ micro_ineg( dst->i[2] = -src->i[2]; dst->i[3] = -src->i[3]; } +#endif static void micro_not( @@ -874,6 +907,7 @@ micro_trunc( dst->f[3] = (float) (int) src0->f[3]; } +#if 0 static void micro_ushr( union tgsi_exec_channel *dst, @@ -885,6 +919,7 @@ micro_ushr( dst->u[2] = src0->u[2] >> src1->u[2]; dst->u[3] = src0->u[3] >> src1->u[3]; } +#endif static void micro_sin( @@ -919,6 +954,7 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } +#if 0 static void micro_u2f( union tgsi_exec_channel *dst, @@ -929,6 +965,7 @@ micro_u2f( dst->f[2] = (float) src->u[2]; dst->f[3] = (float) src->u[3]; } +#endif static void micro_xor( @@ -1045,11 +1082,28 @@ fetch_source( union tgsi_exec_channel index; uint swizzle; + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = SrcRegister.File + * [1] = SrcRegister.Index + */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = SrcRegisterInd.File + * [2] = SrcRegisterInd.Index + * .x = SrcRegisterInd.SwizzleX + */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1086,19 +1140,31 @@ fetch_source( } } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[1][3] == file[1*sizeof(file[1])+3], + * where: + * [3] = SrcRegisterDim.Index + */ + if (reg->SrcRegister.Dimension) { + /* The size of the first-order array depends on the register file type. + * We need to multiply the index to the first array to get an effective, + * "flat" index that points to the beginning of the second-order array. + */ + switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; + index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; + index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); @@ -1109,6 +1175,17 @@ fetch_source( index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[1][ind[4].y+3], + * where: + * ind = SrcRegisterDimInd.File + * [4] = SrcRegisterDimInd.Index + * .y = SrcRegisterDimInd.SwizzleX + */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1141,6 +1218,11 @@ fetch_source( index.i[i] = 0; } } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether SrcRegisterDim is followed + * by a dimension register and continue the saga. + */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); @@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach, /* - * Fetch a texel using STR texture coordinates. + * Fetch a four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, @@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; + union tgsi_exec_channel r[4]; uint chan_index; float lodBias; @@ -1547,7 +1629,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -1573,7 +1655,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, /* inputs */ &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -1599,7 +1681,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1709,6 +1791,7 @@ exec_declaration( break; default: + eval = NULL; assert( 0 ); } @@ -1751,7 +1834,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); + micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; @@ -1806,6 +1889,7 @@ exec_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fc40a25e09..4ffd4efbff 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -68,17 +68,12 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. */ struct tgsi_sampler { - const struct pipe_sampler_state *state; - struct pipe_texture *texture; /** Get samples for four fragments in a quad */ void (*get_samples)(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], @@ -86,8 +81,6 @@ struct tgsi_sampler const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; }; /** @@ -178,6 +171,16 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_LOOP_NESTING 20 #define TGSI_EXEC_MAX_CALL_NESTING 20 +/* The maximum number of input attributes per vertex. For 2D + * input register files, this is the stride between two 1D + * arrays. + */ +#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 + +/* The maximum number of constant vectors per constant buffer. + */ +#define TGSI_EXEC_MAX_CONST_BUFFER 4096 + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -195,7 +198,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector *Temps; struct tgsi_exec_vector *Addrs; - struct tgsi_sampler *Samplers; + struct tgsi_sampler **Samplers; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; unsigned ImmLimit; @@ -258,7 +261,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers); + struct tgsi_sampler **samplers); uint tgsi_exec_machine_run( diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 68c7a6b7f5..2b8a6f0fb1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_info.h" static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c index 5371a88b96..d88c2558d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_iterate.h" boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 3757486ba9..22006edf3d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens( 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; } + +/** + * This function is used to avoid and work-around type punning/aliasing + * warnings. The warnings seem harmless on x86 but on PPC they cause + * real failures. + */ +static INLINE void +copy_token(void *dst, const void *src) +{ + memcpy(dst, src, 4); +} + + +/** + * Get next 4-byte token, return it at address specified by 'token' + */ static void next_token( struct tgsi_parse_context *ctx, void *token ) { assert( !tgsi_parse_end_of_tokens( ctx ) ); - - *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; + copy_token(token, &ctx->Tokens[ctx->Position]); + ctx->Position++; } + void tgsi_parse_token( struct tgsi_parse_context *ctx ) @@ -116,7 +133,7 @@ tgsi_parse_token( struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; *decl = tgsi_default_full_declaration(); - decl->Declaration = *(struct tgsi_declaration *) &token; + copy_token(&decl->Declaration, &token); next_token( ctx, &decl->DeclarationRange ); @@ -132,15 +149,14 @@ tgsi_parse_token( struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; *imm = tgsi_default_full_immediate(); - imm->Immediate = *(struct tgsi_immediate *) &token; - + copy_token(&imm->Immediate, &token); assert( !imm->Immediate.Extended ); switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: imm->u.Pointer = MALLOC( - sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) ); + for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); } break; @@ -158,8 +174,7 @@ tgsi_parse_token( unsigned extended; *inst = tgsi_default_full_instruction(); - inst->Instruction = *(struct tgsi_instruction *) &token; - + copy_token(&inst->Instruction, &token); extended = inst->Instruction.Extended; while( extended ) { @@ -169,18 +184,15 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_INSTRUCTION_EXT_TYPE_NV: - inst->InstructionExtNv = - *(struct tgsi_instruction_ext_nv *) &token; + copy_token(&inst->InstructionExtNv, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - inst->InstructionExtLabel = - *(struct tgsi_instruction_ext_label *) &token; + copy_token(&inst->InstructionExtLabel, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - inst->InstructionExtTexture = - *(struct tgsi_instruction_ext_texture *) &token; + copy_token(&inst->InstructionExtTexture, &token); break; default: @@ -212,13 +224,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - inst->FullDstRegisters[i].DstRegisterExtConcode = - *(struct tgsi_dst_register_ext_concode *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode, + &token); break; case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - inst->FullDstRegisters[i].DstRegisterExtModulate = - *(struct tgsi_dst_register_ext_modulate *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, + &token); break; default: @@ -245,13 +257,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - inst->FullSrcRegisters[i].SrcRegisterExtSwz = - *(struct tgsi_src_register_ext_swz *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, + &token); break; case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - inst->FullSrcRegisters[i].SrcRegisterExtMod = - *(struct tgsi_src_register_ext_mod *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, + &token); break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c new file mode 100644 index 0000000000..0c64ae5713 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -0,0 +1,1363 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI to PowerPC code generation. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_PPC) + +#include "util/u_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_dump.h" +#include "tgsi_exec.h" +#include "tgsi_ppc.h" +#include "rtasm/rtasm_ppc.h" + + +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + +/** + * How many TGSI temps should be implemented with real PPC vector registers + * rather than memory. + */ +#define MAX_PPC_TEMPS 3 + + +/** + * Context/state used during code gen. + */ +struct gen_context +{ + struct ppc_function *f; + int inputs_reg; /**< GP register pointing to input params */ + int outputs_reg; /**< GP register pointing to output params */ + int temps_reg; /**< GP register pointing to temporary "registers" */ + int immed_reg; /**< GP register pointing to immediates buffer */ + int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ + + int offset_reg; /**< used to reduce redundant li instructions */ + int offset_value; + + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ + int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ + + /** + * Map TGSI temps to PPC vector temps. + * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. + * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. + */ + int temps_map[MAX_PPC_TEMPS][4]; + + /** + * Cache of src registers. + * This is used to avoid redundant load instructions. + */ + struct { + struct tgsi_full_src_register src; + uint chan; + uint vec; + } regs[12]; /* 3 src regs, 4 channels */ + uint num_regs; +}; + + +/** + * Initialize code generation context. + */ +static void +init_gen_context(struct gen_context *gen, struct ppc_function *func) +{ + uint i; + + memset(gen, 0, sizeof(*gen)); + gen->f = func; + gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen->immed_reg = ppc_reserve_register(func, 6); + gen->const_reg = ppc_reserve_register(func, 7); + gen->builtins_reg = ppc_reserve_register(func, 8); + gen->one_vec = -1; + gen->bit31_vec = -1; + gen->offset_reg = -1; + gen->offset_value = -9999999; + for (i = 0; i < MAX_PPC_TEMPS; i++) { + gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); + } +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) +{ + return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Index < MAX_PPC_TEMPS); +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) +{ + return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Index < MAX_PPC_TEMPS); +} + + + +/** + * All PPC vector load/store instructions form an effective address + * by adding the contents of two registers. For example: + * lvx v2,r8,r9 # v2 = memory[r8 + r9] + * stvx v2,r8,r9 # memory[r8 + r9] = v2; + * So our lvx/stvx instructions are typically preceded by an 'li' instruction + * to load r9 (above) with an immediate (an offset). + * This code emits that 'li' instruction, but only if the offset value is + * different than the previous 'li'. + * This optimization seems to save about 10% in the instruction count. + * Note that we need to unconditionally emit an 'li' inside basic blocks + * (such as inside loops). + */ +static int +emit_li_offset(struct gen_context *gen, int offset) +{ + if (gen->offset_reg <= 0) { + /* allocate a GP register for storing load/store offset */ + gen->offset_reg = ppc_allocate_register(gen->f); + } + + /* emit new 'li' if offset is changing */ + if (gen->offset_value < 0 || gen->offset_value != offset) { + gen->offset_value = offset; + ppc_li(gen->f, gen->offset_reg, offset); + } + + return gen->offset_reg; +} + + +/** + * Forces subsequent emit_li_offset() calls to emit an 'li'. + * To be called at the top of basic blocks. + */ +static void +reset_li_offset(struct gen_context *gen) +{ + gen->offset_value = -9999999; +} + + + +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset = pos * 4; + int offset_reg = emit_li_offset(gen, offset); + + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + +/** + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + */ +static int +gen_one_vec(struct gen_context *gen) +{ + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + load_constant_vec(gen, gen->one_vec, 1.0f); + } + return gen->one_vec; +} + +/** + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. + */ +static int +gen_get_bit31_vec(struct gen_context *gen) +{ + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; +} + + +/** + * Register fetch. Return PPC vector register with result. + */ +static int +emit_fetch(struct gen_context *gen, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) +{ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + int dst_vec = -1; + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + if (is_ppc_vec_temporary(reg)) { + /* use PPC vec register */ + dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + } + else { + /* use memory-based temp register "file" */ + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our immediates start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + } + break; + default: + assert( 0 ); + } + break; + case TGSI_EXTSWIZZLE_ZERO: + ppc_vzero(gen->f, dst_vec); + break; + case TGSI_EXTSWIZZLE_ONE: + { + int one_vec = gen_one_vec(gen); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_vmove(gen->f, dst_vec, one_vec); + } + break; + default: + assert( 0 ); + } + + assert(dst_vec >= 0); + + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); + int dst_vec2; + + if (is_ppc_vec_temporary(reg)) { + /* need to use a new temp */ + dst_vec2 = ppc_allocate_vec_register(gen->f); + } + else { + dst_vec2 = dst_vec; + } + + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + default: + assert(0); + } + return dst_vec2; + } + } + + return dst_vec; +} + + + +/** + * Test if two TGSI src registers refer to the same memory location. + * We use this to avoid redundant register loads. + */ +static boolean +equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, + const struct tgsi_full_src_register *b, uint chan_b) +{ + int swz_a, swz_b; + int sign_a, sign_b; + if (a->SrcRegister.File != b->SrcRegister.File) + return FALSE; + if (a->SrcRegister.Index != b->SrcRegister.Index) + return FALSE; + swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); + swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); + if (swz_a != swz_b) + return FALSE; + sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); + sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); + if (sign_a != sign_b) + return FALSE; + return TRUE; +} + + +/** + * Given a TGSI src register and channel index, return the PPC vector + * register containing the value. We use a cache to prevent re-loading + * the same register multiple times. + * \return index of PPC vector register with the desired src operand + */ +static int +get_src_vec(struct gen_context *gen, + struct tgsi_full_instruction *inst, int src_reg, uint chan) +{ + const const struct tgsi_full_src_register *src = + &inst->FullSrcRegisters[src_reg]; + int vec; + uint i; + + /* check the cache */ + for (i = 0; i < gen->num_regs; i++) { + if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { + /* cache hit */ + assert(gen->regs[i].vec >= 0); + return gen->regs[i].vec; + } + } + + /* cache miss: allocate new vec reg and emit fetch/load code */ + vec = emit_fetch(gen, src, chan); + gen->regs[gen->num_regs].src = *src; + gen->regs[gen->num_regs].chan = chan; + gen->regs[gen->num_regs].vec = vec; + gen->num_regs++; + + assert(gen->num_regs <= Elements(gen->regs)); + + assert(vec >= 0); + + return vec; +} + + +/** + * Clear the src operand cache. To be called at the end of each emit function. + */ +static void +release_src_vecs(struct gen_context *gen) +{ + uint i; + for (i = 0; i < gen->num_regs; i++) { + const const struct tgsi_full_src_register src = gen->regs[i].src; + if (!is_ppc_vec_temporary(&src)) { + ppc_release_vec_register(gen->f, gen->regs[i].vec); + } + } + gen->num_regs = 0; +} + + + +static int +get_dst_vec(struct gen_context *gen, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + + if (is_ppc_vec_temporary_dst(reg)) { + int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + return vec; + } + else { + return ppc_allocate_vec_register(gen->f); + } +} + + +/** + * Register store. Store 'src_vec' at location indicated by 'reg'. + * \param free_vec Should the src_vec be released when done? + */ +static void +emit_store(struct gen_context *gen, + int src_vec, + const struct tgsi_full_instruction *inst, + unsigned chan_index, + boolean free_vec) +{ + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset_reg = emit_li_offset(gen, offset); + ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + if (is_ppc_vec_temporary_dst(reg)) { + if (!free_vec) { + int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + if (dst_vec != src_vec) + ppc_vmove(gen->f, dst_vec, src_vec); + } + free_vec = FALSE; + } + else { + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset_reg = emit_li_offset(gen, offset); + ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); + } + +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +#endif + + if (free_vec) + ppc_release_vec_register(gen->f, src_vec); +} + + +static void +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0, v1; + uint chan_index; + + v0 = get_src_vec(gen, inst, 0, CHAN_X); + v1 = ppc_allocate_vec_register(gen->f); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + emit_store(gen, v1, inst, chan_index, FALSE); + } + + release_src_vecs(gen); + ppc_release_vec_register(gen->f, v1); +} + + +static void +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ + int v1 = get_dst_vec(gen, inst, chan_index); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int bit31_vec = gen_get_bit31_vec(gen); + ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ + ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + if (v0 != v1) + ppc_vmove(gen->f, v1, v0); + break; + default: + assert(0); + } + emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ + } + + release_src_vecs(gen); +} + + +static void +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int zero_vec = -1; + uint chan; + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); + + /* emit binop */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + + /* store v2 */ + emit_store(gen, v2, inst, chan, TRUE); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) + ppc_release_vec_register(gen->f, zero_vec); + + release_src_vecs(gen); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan; + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_src_vec(gen, inst, 2, chan); + int v3 = get_dst_vec(gen, inst, chan); + + /* emit ALU */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + + /* store v3 */ + emit_store(gen, v3, inst, chan, TRUE); + } + + release_src_vecs(gen); +} + + +/** + * Vector comparisons, resulting in 1.0 or 0.0 values. + */ +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan; + int one_vec = gen_one_vec(gen); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); + boolean complement = FALSE; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } + + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ + + if (complement) + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ + else + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ + + /* store v2 */ + emit_store(gen, v2, inst, chan, TRUE); + } + + release_src_vecs(gen); +} + + +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0, v1, v2; + uint chan_index; + + v2 = ppc_allocate_vec_register(gen->f); + + ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ + + v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ + v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ + v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ + } + + release_src_vecs(gen); + + ppc_release_vec_register(gen->f, v2); +} + + +/** Approximation for vr = pow(va, vb) */ +static void +ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) +{ + /* pow(a,b) ~= exp2(log2(a) * b) */ + int t_vec = ppc_allocate_vec_register(f); + int zero_vec = ppc_allocate_vec_register(f); + + ppc_vzero(f, zero_vec); + + ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ + ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ + + ppc_release_vec_register(f, t_vec); + ppc_release_vec_register(f, zero_vec); +} + + +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int one_vec = gen_one_vec(gen); + + /* Compute X */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_store(gen, one_vec, inst, CHAN_X, FALSE); + } + + /* Compute Y, Z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int x_vec; + int zero_vec = ppc_allocate_vec_register(gen->f); + + x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ + + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ + ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_store(gen, x_vec, inst, CHAN_Y, FALSE); + } + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int y_vec, w_vec; + int z_vec = ppc_allocate_vec_register(gen->f); + int pow_vec = ppc_allocate_vec_register(gen->f); + int pos_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); + + y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ + ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ + + w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ + + /* clamp W to [-128, 128] */ + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); + ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ + ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ + + /* if temp.x > 0 + * z = pow(tmp.y, tmp.w) + * else + * z = 0.0 + */ + ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ + ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ + ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ + + emit_store(gen, z_vec, inst, CHAN_Z, FALSE); + + ppc_release_vec_register(gen->f, z_vec); + ppc_release_vec_register(gen->f, pow_vec); + ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); + } + + ppc_release_vec_register(gen->f, zero_vec); + } + + /* Compute W */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + release_src_vecs(gen); +} + + +static void +emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int one_vec = gen_one_vec(gen); + int src_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* Compute X = 2^floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_X); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ + emit_store(gen, dst_vec, inst, CHAN_X, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Y = src - floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Y); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ + emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApprox2ToX(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + release_src_vecs(gen); +} + + +static void +emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int bit31_vec = gen_get_bit31_vec(gen); + const int one_vec = gen_one_vec(gen); + int src_vec, abs_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* compute abs(src) */ + abs_vec = ppc_allocate_vec_register(gen->f); + ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + + /* compute tmp = floor(log2(abs)) */ + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ + ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ + + /* Compute X = tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + + /* Compute Y = abs / 2^tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + const int zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ + ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ + /* tmp = abs * tmp + zero */ + ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + ppc_release_vec_register(gen->f, zero_vec); + } + + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApproxLog2(abs) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + ppc_release_vec_register(gen->f, abs_vec); + release_src_vecs(gen); +} + + +static void +emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); + int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); + int pow_vec = ppc_allocate_vec_register(gen->f); + int chan; + + ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + emit_store(gen, pow_vec, inst, chan, FALSE); + } + + ppc_release_vec_register(gen->f, pow_vec); + + release_src_vecs(gen); +} + + +static void +emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int x0_vec, y0_vec, z0_vec; + int x1_vec, y1_vec, z1_vec; + int zero_vec, tmp_vec; + int tmp2_vec; + + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + + tmp_vec = ppc_allocate_vec_register(gen->f); + tmp2_vec = ppc_allocate_vec_register(gen->f); + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + x0_vec = get_src_vec(gen, inst, 0, CHAN_X); + x1_vec = get_src_vec(gen, inst, 1, CHAN_X); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); + y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); + z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); + } + + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { + /* tmp = y0 * z1 */ + ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); + /* tmp = tmp - z0 * y1*/ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { + /* tmp = z0 * x1 */ + ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); + /* tmp = tmp - x0 * z1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { + /* tmp = x0 * y1 */ + ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); + /* tmp = tmp - y0 * x1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); + } + /* W is undefined */ + + ppc_release_vec_register(gen->f, tmp_vec); + ppc_release_vec_register(gen->f, zero_vec); + release_src_vecs(gen); +} + +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); + break; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); + break; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); + break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); + break; + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); + break; + case TGSI_OPCODE_LOG: + emit_log(gen, inst); + break; + case TGSI_OPCODE_EXP: + emit_exp(gen, inst); + break; + case TGSI_OPCODE_POW: + emit_pow(gen, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(gen, inst); + break; + case TGSI_OPCODE_END: + /* normal end */ + return 1; + default: + return 0; + } + return 1; +} + + +static void +emit_declaration( + struct ppc_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { +#if 0 + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } +#endif + } +} + + + +static void +emit_prologue(struct ppc_function *func) +{ + /* XXX set up stack frame */ +} + + +static void +emit_epilogue(struct ppc_function *func) +{ + ppc_comment(func, -4, "Epilogue:"); + ppc_return(func); + /* XXX restore prev stack frame */ +#if 0 + debug_printf("PPC: Emitted %u instructions\n", func->num_inst); +#endif +} + + + +/** + * Translate a TGSI vertex/fragment shader to PPC code. + * + * \param tokens the TGSI input shader + * \param func the output PPC code/function + * \param immediates buffer to place immediates, later passed to PPC func + * \return TRUE for success, FALSE if translation failed + */ +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + static int use_ppc_asm = -1; + struct tgsi_parse_context parse; + /*boolean instruction_phase = FALSE;*/ + unsigned ok = 1; + uint num_immediates = 0; + struct gen_context gen; + uint ic = 0; + + if (use_ppc_asm < 0) { + /* If GALLIUM_NOPPC is set, don't use PPC codegen */ + use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); + } + if (!use_ppc_asm) + return FALSE; + + if (0) { + debug_printf("\n********* TGSI->PPC ********\n"); + tgsi_dump(tokens, 0); + } + + util_init_math(); + + init_gen_context(&gen, func); + + emit_prologue(func); + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens(&parse) && ok) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration(func, &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (func->print) { + _debug_printf("# "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } + + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* splat each immediate component into a float[4] vector for SoA */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for (i = 0; i < size; i++) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + emit_epilogue(func); + + tgsi_parse_free( &parse ); + + if (ppc_num_instructions(func) == 0) { + /* ran out of memory for instructions */ + ok = FALSE; + } + + if (!ok) + debug_printf("TGSI->PPC translation failed\n"); + + return ok; +} + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h new file mode 100644 index 0000000000..829ec075e7 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PPC_H +#define TGSI_PPC_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct ppc_function; + +extern const float ppc_builtin_constants[]; + + +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *function, + float (*immediates)[4], + boolean do_swizzles); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PPC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index bc7b941b78..76e773da91 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 1239f6c076..c535788819 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -43,6 +43,9 @@ /** + * Scan the given TGSI shader to collect information such as number of + * registers used, special instructions used, etc. + * \return info the result of the scan */ void tgsi_scan_shader(const struct tgsi_token *tokens, @@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; - uint file = fulldecl->Declaration.File; + const uint file = fulldecl->Declaration.File; uint reg; for (reg = fulldecl->DeclarationRange.First; reg <= fulldecl->DeclarationRange.Last; @@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } - - if (file == TGSI_FILE_OUTPUT) { + else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; @@ -149,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_IMMEDIATE: - info->immediate_count++; + { + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + } break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index ff869c8312..d70bcd03c5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,9 +25,16 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) + +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#if defined(PIPE_ARCH_SSE) +#include "util/u_sse.h" +#endif #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -35,8 +42,6 @@ #include "rtasm/rtasm_x86sse.h" -#ifdef PIPE_ARCH_X86 - /* for 1/sqrt() * * This costs about 100fps (close to 10%) in gears: @@ -509,10 +514,31 @@ emit_coef_dady( * Function call helpers. */ +/** + * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be + * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee + * that the stack pointer is 16 byte aligned, as expected. + */ static void -emit_push_gp( - struct x86_function *func ) +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned i, n; + unsigned xmm_mask; + + /* Bitmask of the xmm registers to save */ + xmm_mask = (1 << xmm_save) - 1; + xmm_mask &= ~(1 << xmm_dst); + + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + x86_push( func, x86_make_reg( file_REG32, reg_AX) ); @@ -522,12 +548,49 @@ emit_push_gp( x86_push( func, x86_make_reg( file_REG32, reg_DX) ); -} + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) + ++n; + + x86_sub_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), + make_xmm( i ) ); + ++n; + } + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + make_xmm( i ), + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); + ++n; + } + + x86_add_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); -static void -x86_pop_gp( - struct x86_function *func ) -{ /* Restore GP registers in a reverse order. */ x86_pop( @@ -539,39 +602,6 @@ x86_pop_gp( x86_pop( func, x86_make_reg( file_REG32, reg_AX) ); -} - -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - emit_push_gp( - func ); - - { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - } - - - x86_pop_gp( - func ); sse_movaps( func, @@ -582,6 +612,7 @@ emit_func_call_dst( static void emit_func_call_dst_src( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src, void (PIPE_CDECL *code)() ) @@ -593,10 +624,119 @@ emit_func_call_dst_src( emit_func_call_dst( func, + xmm_save, xmm_dst, code ); } + +#if defined(PIPE_ARCH_SSE) + +/* + * Fast SSE2 implementation of special math functions. + */ + +#define POLY0(x, c0) _mm_set1_ps(c0) +#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) +#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) +#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) +#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) +#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +exp2f4(__m128 x) +{ + __m128i ipart; + __m128 fpart, expipart, expfpart; + + x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); + x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); + + /* ipart = int(x - 0.5) */ + ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); + + /* fpart = x - ipart */ + fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); + + /* expipart = (float) (1 << ipart) */ + expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); + + /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ +#if EXP_POLY_DEGREE == 5 + expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); +#elif EXP_POLY_DEGREE == 4 + expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); +#elif EXP_POLY_DEGREE == 3 + expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); +#elif EXP_POLY_DEGREE == 2 + expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); +#else +#error +#endif + + return _mm_mul_ps(expipart, expfpart); +} + + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +log2f4(__m128 x) +{ + __m128i expmask = _mm_set1_epi32(0x7f800000); + __m128i mantmask = _mm_set1_epi32(0x007fffff); + __m128 one = _mm_set1_ps(1.0f); + + __m128i i = _mm_castps_si128(x); + + /* exp = (float) exponent(x) */ + __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); + + /* mant = (float) mantissa(x) */ + __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); + + __m128 logmant; + + /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +#if LOG_POLY_DEGREE == 6 + logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); +#elif LOG_POLY_DEGREE == 5 + logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); +#elif LOG_POLY_DEGREE == 4 + logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); +#elif LOG_POLY_DEGREE == 3 + logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); +#else +#error +#endif + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); + + return _mm_add_ps(logmant, exp); +} + + +static INLINE __m128 +powf4(__m128 x, __m128 y) +{ + return exp2f4(_mm_mul_ps(log2f4(x), y)); +} + +#endif /* PIPE_ARCH_SSE */ + + + /** * Low-level instruction translators. */ @@ -639,38 +779,42 @@ cos4f( static void emit_cos( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, cos4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif ex24f( float *store ) { -#if FAST_MATH +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); +#else store[0] = util_fast_exp2( store[0] ); store[1] = util_fast_exp2( store[1] ); store[2] = util_fast_exp2( store[2] ); store[3] = util_fast_exp2( store[3] ); -#else - store[0] = powf( 2.0f, store[0] ); - store[1] = powf( 2.0f, store[1] ); - store[2] = powf( 2.0f, store[2] ); - store[3] = powf( 2.0f, store[3] ); #endif } static void emit_ex2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, ex24f ); } @@ -710,10 +854,12 @@ flr4f( static void emit_flr( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, flr4f ); } @@ -731,31 +877,42 @@ frc4f( static void emit_frc( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, frc4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif lg24f( float *store ) { +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); +#else store[0] = util_fast_log2( store[0] ); store[1] = util_fast_log2( store[1] ); store[2] = util_fast_log2( store[2] ); store[3] = util_fast_log2( store[3] ); +#endif } static void emit_lg2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, lg24f ); } @@ -797,30 +954,32 @@ emit_neg( } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif pow4f( float *store ) { -#if FAST_MATH +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); +#else store[0] = util_fast_pow( store[0], store[4] ); store[1] = util_fast_pow( store[1], store[5] ); store[2] = util_fast_pow( store[2], store[6] ); store[3] = util_fast_pow( store[3], store[7] ); -#else - store[0] = powf( store[0], store[4] ); - store[1] = powf( store[1], store[5] ); - store[2] = powf( store[2], store[6] ); - store[3] = powf( store[3], store[7] ); #endif } static void emit_pow( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src ) { emit_func_call_dst_src( func, + xmm_save, xmm_dst, xmm_src, pow4f ); @@ -855,10 +1014,12 @@ rnd4f( static void emit_rnd( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, rnd4f ); } @@ -935,10 +1096,12 @@ sgn4f( static void emit_sgn( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, sgn4f ); } @@ -955,10 +1118,12 @@ sin4f( static void emit_sin (struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst) { emit_func_call_dst( func, + xmm_save, xmm_dst, sin4f ); } @@ -1378,7 +1543,7 @@ emit_instruction( get_temp( TGSI_EXEC_TEMP_MINUS_128_I, TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 1, 2 ); + emit_pow( func, 3, 1, 2 ); FETCH( func, *inst, 0, 0, CHAN_X ); sse_xorps( func, @@ -1410,6 +1575,7 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 1, 0, chan_index ); @@ -1424,11 +1590,11 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { emit_MOV( func, 1, 0 ); - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = ex2(floor(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { emit_MOV( func, 2, 1 ); - emit_ex2( func, 2 ); + emit_ex2( func, 3, 2 ); STORE( func, *inst, 2, 0, CHAN_X ); } /* dst.y = src.x - floor(src.x) */ @@ -1440,7 +1606,7 @@ emit_instruction( } /* dst.z = ex2(src.x) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - emit_ex2( func, 0 ); + emit_ex2( func, 3, 0 ); STORE( func, *inst, 0, 0, CHAN_Z ); } } @@ -1458,21 +1624,21 @@ emit_instruction( FETCH( func, *inst, 0, 0, CHAN_X ); emit_abs( func, 0 ); emit_MOV( func, 1, 0 ); - emit_lg2( func, 1 ); + emit_lg2( func, 2, 1 ); /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( func, *inst, 1, 0, CHAN_Z ); } if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( func, *inst, 1, 0, CHAN_X ); } /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_ex2( func, 1 ); + emit_ex2( func, 2, 1 ); emit_rcp( func, 1, 1 ); emit_mul( func, 0, 1 ); STORE( func, *inst, 0, 0, CHAN_Y ); @@ -1647,7 +1813,18 @@ emit_instruction( case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FETCH( func, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_INDEX: @@ -1662,7 +1839,7 @@ emit_instruction( /* TGSI_OPCODE_FRC */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0 ); + emit_frc( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1675,7 +1852,7 @@ emit_instruction( /* TGSI_OPCODE_FLR */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0 ); + emit_flr( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1683,7 +1860,7 @@ emit_instruction( case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0 ); + emit_rnd( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1691,7 +1868,7 @@ emit_instruction( case TGSI_OPCODE_EXPBASE2: /* TGSI_OPCODE_EX2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0 ); + emit_ex2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1700,7 +1877,7 @@ emit_instruction( case TGSI_OPCODE_LOGBASE2: /* TGSI_OPCODE_LG2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0 ); + emit_lg2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1710,7 +1887,7 @@ emit_instruction( /* TGSI_OPCODE_POW */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 1 ); + emit_pow( func, 0, 0, 1 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1801,7 +1978,7 @@ emit_instruction( case TGSI_OPCODE_COS: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1860,7 +2037,7 @@ emit_instruction( case TGSI_OPCODE_SIN: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1927,7 +2104,7 @@ emit_instruction( case TGSI_OPCODE_ARR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0 ); + emit_rnd( func, 0, 0 ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } @@ -1952,7 +2129,7 @@ emit_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_sgn( func, 0 ); + emit_sgn( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1964,12 +2141,12 @@ emit_instruction( case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { @@ -1995,7 +2172,39 @@ emit_instruction( break; case TGSI_OPCODE_NRM: - return 0; + /* fall-through */ + case TGSI_OPCODE_NRM4: + /* 3 or 4-component normalization */ + { + uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; + /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */ + FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */ + FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */ + FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */ + if (dims == 4) { + FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */ + } + emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */ + emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */ + emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */ + emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */ + emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + if (dims == 4) { + emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */ + emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */ + emit_add( func, 0, 0 ); /* xmm0 += xmm1 */ + } + emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + if (chan_index < dims) { + emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */ + STORE( func, *inst, 4+chan_index, 0, chan_index ); + } + } + } break; case TGSI_OPCODE_DIV: @@ -2003,7 +2212,16 @@ emit_instruction( break; case TGSI_OPCODE_DP2: - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_TXL: @@ -2454,7 +2672,7 @@ tgsi_emit_sse2( case TGSI_TOKEN_TYPE_IMMEDIATE: /* simply copy the immediate values into the next immediates[] slot */ { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; uint i; assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9454563361..58fe07c11d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -1023,7 +1023,7 @@ static boolean parse_immediate( struct translate_ctx *ctx ) ctx->cur++; imm = tgsi_default_full_immediate(); - imm.Immediate.Size += 4; + imm.Immediate.NrTokens += 4; imm.Immediate.DataType = TGSI_IMM_FLOAT32; imm.u.Pointer = values; diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index ea87da31e5..062c1be938 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,7 +31,7 @@ * Authors: Brian Paul */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 50101a9bb0..71f8a6ca40 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 5c227c1eb5..671e671df2 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -4,7 +4,7 @@ include $(TOP)/configs/current LIBNAME = util C_SOURCES = \ - p_debug.c \ + u_debug.c \ u_blit.c \ u_cache.c \ u_draw_quad.c \ @@ -12,6 +12,8 @@ C_SOURCES = \ u_handle_table.c \ u_hash_table.c \ u_hash.c \ + u_keymap.c \ + u_linear.c \ u_math.c \ u_mm.c \ u_rect.c \ @@ -21,7 +23,8 @@ C_SOURCES = \ u_stream_wd.c \ u_tile.c \ u_time.c \ - u_timed_winsys.c + u_timed_winsys.c \ + u_simple_screen.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 1ef06631bf..84e4c48476 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -3,25 +3,28 @@ Import('*') util = env.ConvenienceLibrary( target = 'util', source = [ - 'p_debug.c', - 'p_debug_mem.c', - 'p_debug_prof.c', 'u_blit.c', 'u_cache.c', + 'u_debug.c', + 'u_debug_memory.c', + 'u_debug_profile.c', 'u_draw_quad.c', 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash.c', 'u_hash_table.c', + 'u_keymap.c', 'u_math.c', 'u_mm.c', 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', + 'u_stream_stdc.c', + 'u_stream_wd.c', 'u_tile.c', 'u_time.c', + 'u_timed_winsys.c', + 'u_simple_screen.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 2cef3338b5..efc3a874cc 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -34,10 +34,9 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_blit.h" @@ -415,7 +414,7 @@ util_blit_pixels(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); @@ -526,7 +525,7 @@ util_blit_pixels_tex(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c index 0a1a64259f..41cd38171f 100644 --- a/src/gallium/auxiliary/util/u_cache.c +++ b/src/gallium/auxiliary/util/u_cache.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/u_debug.c index 125f3daf00..43d424b1d6 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -36,6 +37,13 @@ #include <windows.h> #include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +#include <stdio.h> +#include <stdlib.h> +#include <windows.h> +#include <types.h> + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #ifndef WIN32_LEAN_AND_MEAN @@ -51,7 +59,7 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -98,7 +106,35 @@ void _debug_vprintf(const char *format, va_list ap) OutputDebugStringA(buf); buf[0] = '\0'; } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + wchar_t *wide_format; + long wide_str_len; + char buf[512]; + int ret; +#if (_WIN32_WCE < 600) + ret = vsprintf(buf, format, ap); + if(ret < 0){ + sprintf(buf, "Cant handle debug print!"); + ret = 25; + } +#else + ret = vsprintf_s(buf, 512, format, ap); + if(ret < 0){ + sprintf_s(buf, 512, "Cant handle debug print!"); + ret = 25; + } +#endif + buf[ret] = '\0'; + /* Format is ascii - needs to be converted to wchar_t for printing */ + wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0); + wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); + if (wide_format) { + MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, + wide_format, wide_str_len); + NKDbgPrintfW(wide_format, wide_format); + free(wide_format); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO */ #else /* !PIPE_SUBSYSTEM_WINDOWS */ #ifdef DEBUG @@ -308,6 +344,13 @@ debug_get_flags_option(const char *name, str = _debug_get_option(name); if(!str) result = dfault; + else if (!util_strcmp(str, "help")) { + result = dfault; + while (flags->name) { + debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value); + flags++; + } + } else { result = 0; while( flags->name ) { @@ -317,7 +360,12 @@ debug_get_flags_option(const char *name, } } - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } + else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } return result; } @@ -358,6 +406,32 @@ debug_dump_enum(const struct debug_named_value *names, const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value) +{ + static char rest[64]; + + while(names->name) { + if(names->value == value) { + const char *name = names->name; + while (*name == *prefix) { + name++; + prefix++; + } + return name; + } + ++names; + } + + + + util_snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + +const char * debug_dump_flags(const struct debug_named_value *names, unsigned long value) { @@ -486,16 +560,24 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB), - DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_SRGBA), #endif DEBUG_NAMED_VALUE_END }; @@ -627,6 +709,7 @@ void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface) { +#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct util_stream *stream; unsigned surface_usage; struct bmp_file_header bmfh; @@ -693,6 +776,7 @@ error2: FREE(rgba); error1: ; +#endif } #endif diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h new file mode 100644 index 0000000000..b298b9b66d --- /dev/null +++ b/src/gallium/auxiliary/util/u_debug.h @@ -0,0 +1,361 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform debugging helpers. + * + * For now it just has assert and printf replacements, but it might be extended + * with stack trace reports and more advanced logging in the near future. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_DEBUG_H_ +#define U_DEBUG_H_ + + +#include <stdarg.h> + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(DBG) || defined(DEBUG) +#ifndef DEBUG +#define DEBUG 1 +#endif +#else +#ifndef NDEBUG +#define NDEBUG 1 +#endif +#endif + + +/* MSVC bebore VC7 does not have the __FUNCTION__ macro */ +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define __FUNCTION__ "???" +#endif + + +void _debug_vprintf(const char *format, va_list ap); + + +static INLINE void +_debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +} + + +/** + * Print debug messages. + * + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: + * - output whole lines + * - avoid outputing large strings (512 bytes is the current maximum length + * that is guaranteed to be printed in all platforms) + */ +static INLINE void +debug_printf(const char *format, ...) +{ +#ifdef DEBUG + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +#else + (void) format; /* silence warning */ +#endif +} + + +#ifdef DEBUG +#define debug_vprintf(_format, _ap) _debug_vprintf(_format, _ap) +#else +#define debug_vprintf(_format, _ap) ((void)0) +#endif + + +#ifdef DEBUG +/** + * Dump a blob in hex to the same place that debug_printf sends its + * messages. + */ +void debug_print_blob( const char *name, const void *blob, unsigned size ); + +/* Print a message along with a prettified format string + */ +void debug_print_format(const char *msg, unsigned fmt ); +#else +#define debug_print_blob(_name, _blob, _size) ((void)0) +#define debug_print_format(_msg, _fmt) ((void)0) +#endif + + +void _debug_break(void); + + +/** + * Hard-coded breakpoint. + */ +#ifdef DEBUG +#if defined(PIPE_ARCH_X86) && defined(PIPE_CC_GCC) +#define debug_break() __asm("int3") +#elif defined(PIPE_ARCH_X86) && defined(PIPE_CC_MSVC) +#define debug_break() do { _asm {int 3} } while(0) +#else +#define debug_break() _debug_break() +#endif +#else /* !DEBUG */ +#define debug_break() ((void)0) +#endif /* !DEBUG */ + + +long +debug_get_num_option(const char *name, long dfault); + +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function); + + +/** + * Assert macro + * + * Do not expect that the assert call terminates -- errors must be handled + * regardless of assert behavior. + */ +#ifdef DEBUG +#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) +#else +#define debug_assert(expr) ((void)0) +#endif + + +/** Override standard assert macro */ +#ifdef assert +#undef assert +#endif +#define assert(expr) debug_assert(expr) + + +/** + * Output the current function name. + */ +#ifdef DEBUG +#define debug_checkpoint() \ + _debug_printf("%s\n", __FUNCTION__) +#else +#define debug_checkpoint() \ + ((void)0) +#endif + + +/** + * Output the full source code position. + */ +#ifdef DEBUG +#define debug_checkpoint_full() \ + _debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) +#else +#define debug_checkpoint_full() \ + ((void)0) +#endif + + +/** + * Output a warning message. Muted on release version. + */ +#ifdef DEBUG +#define debug_warning(__msg) \ + _debug_printf("%s:%u:%s: warning: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_warning(__msg) \ + ((void)0) +#endif + + +/** + * Output an error message. Not muted on release version. + */ +#ifdef DEBUG +#define debug_error(__msg) \ + _debug_printf("%s:%u:%s: error: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_error(__msg) \ + _debug_printf("error: %s\n", __msg) +#endif + + +/** + * Used by debug_dump_enum and debug_dump_flags to describe symbols. + */ +struct debug_named_value +{ + const char *name; + unsigned long value; +}; + + +/** + * Some C pre-processor magic to simplify creating named values. + * + * Example: + * @code + * static const debug_named_value my_names[] = { + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_X), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Y), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Z), + * DEBUG_NAMED_VALUE_END + * }; + * + * ... + * debug_printf("%s = %s\n", + * name, + * debug_dump_enum(my_names, my_value)); + * ... + * @endcode + */ +#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol} +#define DEBUG_NAMED_VALUE_END {NULL, 0} + + +/** + * Convert a enum value to a string. + */ +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value); + +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value); + + +/** + * Convert binary flags value to a string. + */ +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value); + + +/** + * Get option. + * + * It is an alias for getenv on Linux. + * + * On Windows it reads C:\gallium.cfg, which is a text file with CR+LF line + * endings with one option per line as + * + * NAME=value + * + * This file must be terminated with an extra empty line. + */ +const char * +debug_get_option(const char *name, const char *dfault); + +boolean +debug_get_bool_option(const char *name, boolean dfault); + +long +debug_get_num_option(const char *name, long dfault); + +unsigned long +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + unsigned long dfault); + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size); + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr); + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ); + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ); + +unsigned long +debug_memory_begin(void); + +void +debug_memory_end(unsigned long beginning); + + +#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void +debug_profile_start(void); + +void +debug_profile_stop(void); + +#endif + + +#ifdef DEBUG +struct pipe_surface; +void debug_dump_image(const char *prefix, + unsigned format, unsigned cpp, + unsigned width, unsigned height, + unsigned stride, + const void *data); +void debug_dump_surface(const char *prefix, + struct pipe_surface *surface); +void debug_dump_surface_bmp(const char *filename, + struct pipe_surface *surface); +#else +#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) +#define debug_dump_surface(prefix, surface) ((void)0) +#define debug_dump_surface_bmp(filename, surface) ((void)0) +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_DEBUG_H_ */ diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/u_debug_memory.c index 250fd60f63..f6c136f6e5 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -44,7 +44,7 @@ #include <stdlib.h> #endif -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/u_debug_profile.c index 5f9772ef91..5f9772ef91 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/u_debug_profile.c diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index d7bb74b87b..f282f3d289 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -29,7 +29,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "util/u_draw_quad.h" @@ -53,7 +52,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; - vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ + vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; pipe->set_vertex_buffers(pipe, 1, &vbuffer); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b0de5968e9..7c20c7ce7b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -34,10 +35,9 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" @@ -93,13 +93,82 @@ enum dtype typedef ushort half_float; -#if 0 -extern half_float -float_to_half(float f); +static half_float +float_to_half(float f) +{ + /* XXX fix this */ + return 0; +} + +static float +half_to_float(half_float h) +{ + /* XXX fix this */ + return 0.0f; +} + -extern float -half_to_float(half_float h); -#endif + + +/** + * \name Support macros for do_row and do_row_3d + * + * The macro madness is here for two reasons. First, it compacts the code + * slightly. Second, it makes it much easier to adjust the specifics of the + * filter to tune the rounding characteristics. + */ +/*@{*/ +#define DECLARE_ROW_POINTERS(t, e) \ + const t(*rowA)[e] = (const t(*)[e]) srcRowA; \ + const t(*rowB)[e] = (const t(*)[e]) srcRowB; \ + const t(*rowC)[e] = (const t(*)[e]) srcRowC; \ + const t(*rowD)[e] = (const t(*)[e]) srcRowD; \ + t(*dst)[e] = (t(*)[e]) dstRow + +#define DECLARE_ROW_POINTERS0(t) \ + const t *rowA = (const t *) srcRowA; \ + const t *rowB = (const t *) srcRowB; \ + const t *rowC = (const t *) srcRowC; \ + const t *rowD = (const t *) srcRowD; \ + t *dst = (t *) dstRow + +#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \ + ((unsigned) Aj + (unsigned) Ak \ + + (unsigned) Bj + (unsigned) Bk \ + + (unsigned) Cj + (unsigned) Ck \ + + (unsigned) Dj + (unsigned) Dk \ + + 4) >> 3 + +#define FILTER_3D(e) \ + do { \ + dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \ + rowB[j][e], rowB[k][e], \ + rowC[j][e], rowC[k][e], \ + rowD[j][e], rowD[k][e]); \ + } while(0) + +#define FILTER_F_3D(e) \ + do { \ + dst[i][e] = (rowA[j][e] + rowA[k][e] \ + + rowB[j][e] + rowB[k][e] \ + + rowC[j][e] + rowC[k][e] \ + + rowD[j][e] + rowD[k][e]) * 0.125F; \ + } while(0) + +#define FILTER_HF_3D(e) \ + do { \ + const float aj = half_to_float(rowA[j][e]); \ + const float ak = half_to_float(rowA[k][e]); \ + const float bj = half_to_float(rowB[j][e]); \ + const float bk = half_to_float(rowB[k][e]); \ + const float cj = half_to_float(rowC[j][e]); \ + const float ck = half_to_float(rowC[k][e]); \ + const float dj = half_to_float(rowD[j][e]); \ + const float dk = half_to_float(rowD[k][e]); \ + dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \ + * 0.125F); \ + } while(0) +/*@}*/ /** @@ -471,6 +540,385 @@ do_row(enum dtype datatype, uint comps, int srcWidth, } +/** + * Average together four rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * + * \param datatype GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT, + * \c GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + * \param srcWidth Width of a row in the source data + * \param srcRowA Pointer to one of the rows of source data + * \param srcRowB Pointer to one of the rows of source data + * \param srcRowC Pointer to one of the rows of source data + * \param srcRowD Pointer to one of the rows of source data + * \param dstWidth Width of a row in the destination data + * \param srcRowA Pointer to the row of destination data + */ +static void +do_row_3D(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + const void *srcRowC, const void *srcRowD, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + uint i, j, k; + + assert(comps >= 1); + assert(comps <= 4); + + if ((datatype == UBYTE) && (comps == 4)) { + DECLARE_ROW_POINTERS(ubyte, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == UBYTE) && (comps == 3)) { + DECLARE_ROW_POINTERS(ubyte, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == UBYTE) && (comps == 2)) { + DECLARE_ROW_POINTERS(ubyte, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == UBYTE) && (comps == 1)) { + DECLARE_ROW_POINTERS(ubyte, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == USHORT) && (comps == 4)) { + DECLARE_ROW_POINTERS(ushort, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == USHORT) && (comps == 3)) { + DECLARE_ROW_POINTERS(ushort, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == USHORT) && (comps == 2)) { + DECLARE_ROW_POINTERS(ushort, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == USHORT) && (comps == 1)) { + DECLARE_ROW_POINTERS(ushort, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + FILTER_F_3D(3); + } + } + else if ((datatype == FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(float, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + } + } + else if ((datatype == FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(float, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + } + } + else if ((datatype == FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(float, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + FILTER_HF_3D(3); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + } + } + else if ((datatype == UINT) && (comps == 1)) { + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + const uint *rowC = (const uint *) srcRowC; + const uint *rowD = (const uint *) srcRowD; + float *dst = (float *) dstRow; + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k]) + + ((uint64_t) rowB[j] + (uint64_t) rowB[k]) + + ((uint64_t) rowC[j] + (uint64_t) rowC[k]) + + ((uint64_t) rowD[j] + (uint64_t) rowD[k])); + dst[i] = (float)((double) tmp * 0.125); + } + } + else if ((datatype == USHORT_5_6_5) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowCg0 = (rowC[j] >> 5) & 0x3f; + const int rowCg1 = (rowC[k] >> 5) & 0x3f; + const int rowDg0 = (rowD[j] >> 5) & 0x3f; + const int rowDg1 = (rowD[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int rowCb0 = (rowC[j] >> 11) & 0x1f; + const int rowCb1 = (rowC[k] >> 11) & 0x1f; + const int rowDb0 = (rowD[j] >> 11) & 0x1f; + const int rowDb1 = (rowD[k] >> 11) & 0x1f; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 11) | (g << 5) | r; + } + } + else if ((datatype == USHORT_4_4_4_4) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowCr0 = rowC[j] & 0xf; + const int rowCr1 = rowC[k] & 0xf; + const int rowDr0 = rowD[j] & 0xf; + const int rowDr1 = rowD[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowCg0 = (rowC[j] >> 4) & 0xf; + const int rowCg1 = (rowC[k] >> 4) & 0xf; + const int rowDg0 = (rowD[j] >> 4) & 0xf; + const int rowDg1 = (rowD[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowCb0 = (rowC[j] >> 8) & 0xf; + const int rowCb1 = (rowC[k] >> 8) & 0xf; + const int rowDb0 = (rowD[j] >> 8) & 0xf; + const int rowDb1 = (rowD[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int rowCa0 = (rowC[j] >> 12) & 0xf; + const int rowCa1 = (rowC[k] >> 12) & 0xf; + const int rowDa0 = (rowD[j] >> 12) & 0xf; + const int rowDa1 = (rowD[k] >> 12) & 0xf; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 12) | (b << 8) | (g << 4) | r; + } + } + else if ((datatype == USHORT_1_5_5_5_REV) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowCg0 = (rowC[j] >> 5) & 0x1f; + const int rowCg1 = (rowC[k] >> 5) & 0x1f; + const int rowDg0 = (rowD[j] >> 5) & 0x1f; + const int rowDg1 = (rowD[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowCb0 = (rowC[j] >> 10) & 0x1f; + const int rowCb1 = (rowC[k] >> 10) & 0x1f; + const int rowDb0 = (rowD[j] >> 10) & 0x1f; + const int rowDb1 = (rowD[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int rowCa0 = (rowC[j] >> 15) & 0x1; + const int rowCa1 = (rowC[k] >> 15) & 0x1; + const int rowDa0 = (rowD[j] >> 15) & 0x1; + const int rowDa1 = (rowD[k] >> 15) & 0x1; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 15) | (b << 10) | (g << 5) | r; + } + } + else if ((datatype == UBYTE_3_3_2) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowCr0 = rowC[j] & 0x3; + const int rowCr1 = rowC[k] & 0x3; + const int rowDr0 = rowD[j] & 0x3; + const int rowDr1 = rowD[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowCg0 = (rowC[j] >> 2) & 0x7; + const int rowCg1 = (rowC[k] >> 2) & 0x7; + const int rowDg0 = (rowD[j] >> 2) & 0x7; + const int rowDg1 = (rowD[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int rowCb0 = (rowC[j] >> 5) & 0x7; + const int rowCb1 = (rowC[k] >> 5) & 0x7; + const int rowDb0 = (rowD[j] >> 5) & 0x7; + const int rowDb1 = (rowD[k] >> 5) & 0x7; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 5) | (g << 2) | r; + } + } + else { + debug_printf("bad format in do_row_3D()"); + } +} + + + static void format_to_type_comps(enum pipe_format pformat, enum dtype *datatype, uint *comps) @@ -576,6 +1024,87 @@ reduce_2d(enum pipe_format pformat, static void +reduce_3d(enum pipe_format pformat, + int srcWidth, int srcHeight, int srcDepth, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, int dstDepth, + int dstRowStride, ubyte *dstPtr) +{ + const int bpt = pf_get_size(pformat); + const int border = 0; + int img, row; + int bytesPerSrcImage, bytesPerDstImage; + int bytesPerSrcRow, bytesPerDstRow; + int srcImageOffset, srcRowOffset; + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + bytesPerSrcImage = srcWidth * srcHeight * bpt; + bytesPerDstImage = dstWidth * dstHeight * bpt; + + bytesPerSrcRow = srcWidth * bpt; + bytesPerDstRow = dstWidth * bpt; + + /* Offset between adjacent src images to be averaged together */ + srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage; + + /* Offset between adjacent src rows to be averaged together */ + srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt; + + /* + * Need to average together up to 8 src pixels for each dest pixel. + * Break that down into 3 operations: + * 1. take two rows from source image and average them together. + * 2. take two rows from next source image and average them together. + * 3. take the two averaged rows and average them for the final dst row. + */ + + /* + _mesa_printf("mip3d %d x %d x %d -> %d x %d x %d\n", + srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth); + */ + + for (img = 0; img < dstDepth; img++) { + /* first source image pointer, skipping border */ + const ubyte *imgSrcA = srcPtr + + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border + + img * (bytesPerSrcImage + srcImageOffset); + /* second source image pointer, skipping border */ + const ubyte *imgSrcB = imgSrcA + srcImageOffset; + /* address of the dest image, skipping border */ + ubyte *imgDst = dstPtr + + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border + + img * bytesPerDstImage; + + /* setup the four source row pointers and the dest row pointer */ + const ubyte *srcImgARowA = imgSrcA; + const ubyte *srcImgARowB = imgSrcA + srcRowOffset; + const ubyte *srcImgBRowA = imgSrcB; + const ubyte *srcImgBRowB = imgSrcB + srcRowOffset; + ubyte *dstImgRow = imgDst; + + for (row = 0; row < dstHeight; row++) { + do_row_3D(datatype, comps, srcWidth, + srcImgARowA, srcImgARowB, + srcImgBRowA, srcImgBRowB, + dstWidth, dstImgRow); + + /* advance to next rows */ + srcImgARowA += bytesPerSrcRow + srcRowOffset; + srcImgARowB += bytesPerSrcRow + srcRowOffset; + srcImgBRowA += bytesPerSrcRow + srcRowOffset; + srcImgBRowB += bytesPerSrcRow + srcRowOffset; + dstImgRow += bytesPerDstRow; + } + } +} + + + + +static void make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel) @@ -596,19 +1125,19 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_1d(pt->format, srcSurf->width, srcMap, dstSurf->width, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -639,11 +1168,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_2d(pt->format, @@ -652,8 +1181,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf->width, dstSurf->height, dstSurf->stride, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -666,6 +1195,46 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel) { + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + uint dstLevel, zslice = 0; + + assert(pt->block.width == 1); + assert(pt->block.height == 1); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + ubyte *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + +#if 0 + reduce_3d(pt->format, + srcSurf->width, srcSurf->height, + srcSurf->stride, srcMap, + dstSurf->width, dstSurf->height, + dstSurf->stride, dstMap); +#else + (void) reduce_3d; +#endif + + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } } @@ -920,7 +1489,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; + fb.nr_cbufs = 1; /* set min/mag to same filter for faster sw speed */ ctx->sampler.mag_img_filter = filter; diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2d15932ce3..6da7353e25 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -34,7 +34,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_handle_table.h" diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index 0bc8de9632..2f83e318e4 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -39,7 +39,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "cso_cache/cso_hash.h" diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c new file mode 100644 index 0000000000..3f70809efd --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Key lookup/associative container. + * + * Like Jose's u_hash_table, based on CSO cache code for now. + * + * Author: Brian Paul + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_error.h" + +#include "cso_cache/cso_hash.h" + +#include "util/u_memory.h" +#include "util/u_keymap.h" + + +struct keymap +{ + struct cso_hash *cso; + unsigned key_size; + unsigned max_entries; /* XXX not obeyed net */ + unsigned num_entries; + keymap_delete_func delete_func; +}; + + +struct keymap_item +{ + void *key, *value; +}; + + +/** + * This the default key-delete function used when the client doesn't + * provide one. + */ +static void +default_delete_func(const struct keymap *map, + const void *key, void *data, void *user) +{ + FREE((void*) data); +} + + +static INLINE struct keymap_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct keymap_item *) cso_hash_iter_data(iter); +} + + +/** + * Return 4-byte hash key for a block of bytes. + */ +static unsigned +hash(const void *key, unsigned keySize) +{ + unsigned i, hash; + + keySize /= 4; /* convert from bytes to uints */ + + hash = 0; + for (i = 0; i < keySize; i++) { + hash ^= (i + 1) * ((const unsigned *) key)[i]; + } + + /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/ + + return hash; +} + + +/** + * Create a new map. + * \param keySize size of the keys in bytes + * \param maxEntries max number of entries to allow (~0 = infinity) + * \param deleteFunc optional callback to call when entries + * are deleted/replaced + */ +struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc) +{ + struct keymap *map = MALLOC_STRUCT(keymap); + if (!map) + return NULL; + + map->cso = cso_hash_create(); + if (!map->cso) { + FREE(map); + return NULL; + } + + map->max_entries = maxEntries; + map->num_entries = 0; + map->key_size = keySize; + map->delete_func = deleteFunc ? deleteFunc : default_delete_func; + + return map; +} + + +/** + * Delete/free a keymap and all entries. The deleteFunc that was given at + * create time will be called for each entry. + * \param user user-provided pointer passed through to the delete callback + */ +void +util_delete_keymap(struct keymap *map, void *user) +{ + util_keymap_remove_all(map, user); + cso_hash_delete(map->cso); + FREE(map); +} + + +static INLINE struct cso_hash_iter +hash_table_find_iter(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + iter = cso_hash_find(map->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) cso_hash_iter_data(iter); + if (!memcmp(item->key, key, map->key_size)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct keymap_item * +hash_table_find_item(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) { + return NULL; + } + else { + return hash_table_item(iter); + } +} + + +/** + * Insert a new key + data pointer into the table. + * Note: we create a copy of the key, but not the data! + * If the key is already present in the table, replace the existing + * entry (calling the delete callback on the previous entry). + * If the maximum capacity of the map is reached an old entry + * will be deleted (the delete callback will be called). + */ +boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user) +{ + unsigned key_hash; + struct keymap_item *item; + struct cso_hash_iter iter; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (item) { + /* call delete callback for old entry/item */ + map->delete_func(map, item->key, item->value, user); + item->value = (void *) data; + return TRUE; + } + + item = MALLOC_STRUCT(keymap_item); + if (!item) + return FALSE; + + item->key = mem_dup(key, map->key_size); + item->value = (void *) data; + + iter = cso_hash_insert(map->cso, key_hash, item); + if (cso_hash_iter_is_null(iter)) { + FREE(item); + return FALSE; + } + + map->num_entries++; + + return TRUE; +} + + +/** + * Look up a key in the map and return the associated data pointer. + */ +const void * +util_keymap_lookup(const struct keymap *map, const void *key) +{ + unsigned key_hash; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (!item) + return NULL; + + return item->value; +} + + +/** + * Remove an entry from the map. + * The delete callback will be called if the given key/entry is found. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove(struct keymap *map, const void *key, void *user) +{ + unsigned key_hash; + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) + return; + + item = hash_table_item(iter); + assert(item); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + + map->num_entries--; + + cso_hash_erase(map->cso, iter); +} + + +/** + * Remove all entries from the map, calling the delete callback for each. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove_all(struct keymap *map, void *user) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + iter = cso_hash_first_node(map->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) + cso_hash_take(map->cso, cso_hash_iter_key(iter)); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + iter = cso_hash_first_node(map->cso); + } +} + + +extern void +util_keymap_info(const struct keymap *map) +{ + debug_printf("Keymap %p: %u of max %u entries\n", + (void *) map, map->num_entries, map->max_entries); +} diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h new file mode 100644 index 0000000000..8d60a76fc3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_KEYMAP_H +#define U_KEYMAP_H + +#include "pipe/p_compiler.h" + + +/** opaque keymap type */ +struct keymap; + + +/** Delete/callback function type */ +typedef void (*keymap_delete_func)(const struct keymap *map, + const void *key, void *data, + void *user); + + +extern struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc); + +extern void +util_delete_keymap(struct keymap *map, void *user); + +extern boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user); + +extern const void * +util_keymap_lookup(const struct keymap *map, const void *key); + +extern void +util_keymap_remove(struct keymap *map, const void *key, void *user); + +extern void +util_keymap_remove_all(struct keymap *map, void *user); + +extern void +util_keymap_info(const struct keymap *map); + + +#endif /* U_KEYMAP_H */ diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c new file mode 100644 index 0000000000..6be365e53b --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.c @@ -0,0 +1,70 @@ + +#include "util/u_debug.h" +#include "u_linear.h" + +void +pipe_linear_to_tile(size_t src_stride, const void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + char *dst_ptr2 = (char *) dst_ptr; + + assert(pipe_linear_check_tile(t)); + + /* lets write lineary to the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(dst_ptr2, ptr, bytes); + dst_ptr2 += bytes; + ptr += src_stride; + } + } + } +} + +void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, + size_t dst_stride, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + const char *src_ptr2 = (const char *) src_ptr; + + /* lets read lineary from the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(ptr, src_ptr2, bytes); + src_ptr2 += bytes; + ptr += dst_stride; + } + } + } +} + +void +pipe_linear_fill_info(struct pipe_tile_info *t, + const struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y) +{ + t->block = *block; + + t->tile.width = tile_width; + t->tile.height = tile_height; + t->cols = t->tile.width / t->block.width; + t->rows = t->tile.height / t->block.height; + t->tile.size = t->cols * t->rows * t->block.size; + + t->tiles_x = tiles_x; + t->tiles_y = tiles_y; + t->stride = t->cols * t->tiles_x * t->block.size; + t->size = t->tiles_x * t->tiles_y * t->tile.size; +} diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h new file mode 100644 index 0000000000..1589f029bc --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.h @@ -0,0 +1,61 @@ + +#ifndef U_LINEAR_H +#define U_LINEAR_H + +#include "pipe/p_format.h" + +struct pipe_tile_info +{ + unsigned size; + unsigned stride; + + /* The number of tiles */ + unsigned tiles_x; + unsigned tiles_y; + + /* size of each tile expressed in blocks */ + unsigned cols; + unsigned rows; + + /* Describe the tile in pixels */ + struct pipe_format_block tile; + + /* Describe each block within the tile */ + struct pipe_format_block block; +}; + +void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr); + +void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, + size_t dst_stride, void *dst_ptr); + +/** + * Convenience function to fillout a pipe_tile_info struct. + * @t info to fill out. + * @block block info about pixel layout + * @tile_width the width of the tile in pixels + * @tile_height the height of the tile in pixels + * @tiles_x number of tiles in x axis + * @tiles_y number of tiles in y axis + */ +void pipe_linear_fill_info(struct pipe_tile_info *t, + const struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y); + +static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t) +{ + if (t->tile.size != t->block.size * t->cols * t->rows) + return FALSE; + + if (t->stride != t->block.size * t->cols * t->tiles_x) + return FALSE; + + if (t->size < t->stride * t->rows * t->tiles_y) + return FALSE; + + return TRUE; +} + +#endif /* U_LINEAR_H */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 1ae3234423..1ecde7a912 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,7 +40,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -68,7 +68,7 @@ __inline double ceil(double val) return ceil_val; } -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL __inline double floor(double val) { double floor_val; @@ -341,6 +341,10 @@ unsigned ffs( unsigned u ) } #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /** * Return float bits. diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 79e34e185f..ceb3a1cb61 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -36,7 +36,7 @@ #include "util/u_pointer.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -56,7 +56,7 @@ extern "C" { /* memory debugging */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #define MALLOC( _size ) \ debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) @@ -151,6 +151,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) +#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) + /** * Return memory on given byte alignment diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index 01dd67c810..151a480d34 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -24,14 +24,14 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_mm.h" void -mmDumpMemInfo(const struct mem_block *heap) +u_mmDumpMemInfo(const struct mem_block *heap) { debug_printf("Memory heap %p:\n", (void *)heap); if (heap == 0) { @@ -58,7 +58,7 @@ mmDumpMemInfo(const struct mem_block *heap) } struct mem_block * -mmInit(int ofs, int size) +u_mmInit(int ofs, int size) { struct mem_block *heap, *block; @@ -165,7 +165,7 @@ SliceBlock(struct mem_block *p, struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) { struct mem_block *p; const int mask = (1 << align2)-1; @@ -202,7 +202,7 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) struct mem_block * -mmFindBlock(struct mem_block *heap, int start) +u_mmFindBlock(struct mem_block *heap, int start) { struct mem_block *p; @@ -241,7 +241,7 @@ Join2Blocks(struct mem_block *p) } int -mmFreeMem(struct mem_block *b) +u_mmFreeMem(struct mem_block *b) { if (!b) return 0; @@ -270,7 +270,7 @@ mmFreeMem(struct mem_block *b) void -mmDestroy(struct mem_block *heap) +u_mmDestroy(struct mem_block *heap) { struct mem_block *p; diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h index b226b101cb..ce20e48763 100644 --- a/src/gallium/auxiliary/util/u_mm.h +++ b/src/gallium/auxiliary/util/u_mm.h @@ -49,7 +49,7 @@ struct mem_block { * input: total size in bytes * return: a heap pointer if OK, NULL if error */ -extern struct mem_block *mmInit(int ofs, int size); +extern struct mem_block *u_mmInit(int ofs, int size); /** * Allocate 'size' bytes with 2^align2 bytes alignment, @@ -61,7 +61,7 @@ extern struct mem_block *mmInit(int ofs, int size); * startSearch = linear offset from start of heap to begin search * return: pointer to the allocated block, 0 if error */ -extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, +extern struct mem_block *u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch); /** @@ -69,23 +69,23 @@ extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2 * input: pointer to a block * return: 0 if OK, -1 if error */ -extern int mmFreeMem(struct mem_block *b); +extern int u_mmFreeMem(struct mem_block *b); /** * Free block starts at offset * input: pointer to a heap, start offset * return: pointer to a block */ -extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); +extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start); /** * destroy MM */ -extern void mmDestroy(struct mem_block *mmInit); +extern void u_mmDestroy(struct mem_block *mmInit); /** * For debuging purpose. */ -extern void mmDumpMemInfo(const struct mem_block *mmInit); +extern void u_mmDumpMemInfo(const struct mem_block *mmInit); #endif diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index e45e84ded2..d7c3995dbf 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -119,4 +119,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) } +static INLINE boolean u_reduced_prim( unsigned pipe_prim ) +{ + switch (pipe_prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + + default: + return PIPE_PRIM_TRIANGLES; + } +} + #endif diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c new file mode 100644 index 0000000000..089bbbc48a --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -0,0 +1,143 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "u_simple_screen.h" + +#include "pipe/p_screen.h" +#include "pipe/internal/p_winsys_screen.h" + + +static struct pipe_buffer * +pass_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + return screen->winsys->buffer_create(screen->winsys, + alignment, usage, size); +} + +static struct pipe_buffer * +pass_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + return screen->winsys->user_buffer_create(screen->winsys, + ptr, bytes); +} + +static struct pipe_buffer * +pass_surface_buffer_create(struct pipe_screen *screen, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride) +{ + return screen->winsys->surface_buffer_create(screen->winsys, + width, height, + format, usage, stride); +} + +static void * +pass_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + return screen->winsys->buffer_map(screen->winsys, + buf, usage); +} + +static void +pass_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_unmap(screen->winsys, buf); +} + +static void +pass_buffer_destroy(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_destroy(screen->winsys, buf); +} + + +static void +pass_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private) +{ + screen->winsys->flush_frontbuffer(screen->winsys, + surf, context_private); +} + +static void +pass_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + screen->winsys->fence_reference(screen->winsys, + ptr, fence); +} + +static int +pass_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_signalled(screen->winsys, + fence, flag); +} + +static int +pass_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_finish(screen->winsys, + fence, flag); +} + +void u_simple_screen_init(struct pipe_screen *screen) +{ + screen->buffer_create = pass_buffer_create; + screen->user_buffer_create = pass_user_buffer_create; + screen->surface_buffer_create = pass_surface_buffer_create; + + screen->buffer_map = pass_buffer_map; + screen->buffer_unmap = pass_buffer_unmap; + screen->buffer_destroy = pass_buffer_destroy; + screen->flush_frontbuffer = pass_flush_frontbuffer; + screen->fence_reference = pass_fence_reference; + screen->fence_signalled = pass_fence_signalled; + screen->fence_finish = pass_fence_finish; +} + +const char* u_simple_screen_winsys_name(struct pipe_screen *screen) +{ + return screen->winsys->get_name(screen->winsys); +} diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h new file mode 100644 index 0000000000..6612a8a7c0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_SIMPLE_SCREEN_H +#define U_SIMPLE_SCREEN_H + +struct pipe_screen; +struct pipe_winsys; + +/** + * The following function initializes a simple passthrough screen. + * + * All the relevant screen function pointers will forwarded to the + * winsys. + */ +void u_simple_screen_init(struct pipe_screen *screen); + +/** + * Returns the name of the winsys associated with this screen. + */ +const char* u_simple_screen_winsys_name(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index f06d13c2c4..3cd2d52c64 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -34,10 +34,10 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h new file mode 100644 index 0000000000..e2a8491e62 --- /dev/null +++ b/src/gallium/auxiliary/util/u_sse.h @@ -0,0 +1,77 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SSE intrinsics portability header. + * + * Although the SSE intrinsics are support by all modern x86 and x86-64 + * compilers, there are some intrisincs missing in some implementations + * (especially older MSVC versions). This header abstracts that away. + */ + +#ifndef U_SSE_H_ +#define U_SSE_H_ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_SSE) + +#include <xmmintrin.h> +#include <emmintrin.h> + + +/* MSVC before VC8 does not support the _mm_castxxx_yyy */ +#if defined(_MSC_VER) && _MSC_VER < 1500 + +union __declspec(align(16)) m128_types { + __m128 m128; + __m128i m128i; + __m128d m128d; +}; + +static __inline __m128 +_mm_castsi128_ps(__m128i a) +{ + union m128_types u; + u.m128i = a; + return u.m128; +} + +static __inline __m128i +_mm_castps_si128(__m128 a) +{ + union m128_types u; + u.m128 = a; + return u.m128i; +} + +#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + +#endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 853c503f4f..32f6b072a0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -460,7 +460,7 @@ l8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } @@ -504,7 +504,7 @@ a8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned a; a = float_to_ubyte(pRow[3]); - *dst++ = a; + *dst++ = (ubyte) a; } p += src_stride; } @@ -634,7 +634,7 @@ i8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } @@ -769,6 +769,32 @@ z24s8_get_tile_rgba(const unsigned *src, } +/*** PIPE_FORMAT_Z32_FLOAT ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32f_get_tile_rgba(const float *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++; + } + p += dst_stride; + } +} + + /*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ /** @@ -913,6 +939,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24S8_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_Z32_FLOAT: + z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_YCBCR: ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); break; diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index f84514165a..dde2c74fa8 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -200,7 +200,7 @@ util_time_timeout(const struct util_time *start, } -#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index 8beb3b4c88..f237e12d73 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -29,7 +29,7 @@ * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "u_timed_winsys.h" #include "util/u_memory.h" #include "util/u_time.h" @@ -121,7 +121,8 @@ timed_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); + struct pipe_buffer *buf = + backend->buffer_create( backend, alignment, usage, size ); time_finish(winsys, start, 0, __FUNCTION__); @@ -205,34 +206,18 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys, -static struct pipe_surface * -timed_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - struct pipe_surface *surf = backend->surface_alloc( backend ); - - time_finish(winsys, start, 6, __FUNCTION__); - - return surf; -} - - - -static int -timed_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +timed_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - int ret = backend->surface_alloc_storage( backend, surf, width, height, - format, flags, tex_usage ); + struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, + format, usage, stride ); time_finish(winsys, start, 7, __FUNCTION__); @@ -240,19 +225,6 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys, } -static void -timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - backend->surface_release( backend, s ); - - time_finish(winsys, start, 8, __FUNCTION__); -} - - - static const char * timed_get_name( struct pipe_winsys *winsys ) { @@ -329,11 +301,9 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) ws->base.buffer_unmap = timed_buffer_unmap; ws->base.buffer_destroy = timed_buffer_destroy; ws->base.buffer_create = timed_buffer_create; + ws->base.surface_buffer_create = timed_surface_buffer_create; ws->base.flush_frontbuffer = timed_flush_frontbuffer; ws->base.get_name = timed_get_name; - ws->base.surface_alloc = timed_surface_alloc; - ws->base.surface_alloc_storage = timed_surface_alloc_storage; - ws->base.surface_release = timed_surface_release; ws->base.fence_reference = timed_fence_reference; ws->base.fence_signalled = timed_fence_signalled; ws->base.fence_finish = timed_fence_finish; |