summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c2
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c2
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c8
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.h6
-rw-r--r--src/gallium/auxiliary/draw/Makefile1
-rw-r--r--src/gallium/auxiliary/draw/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c98
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c69
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c76
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c105
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c244
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile4
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_builtins.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_cpu.cpp13
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_p.h4
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp254
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h1
-rw-r--r--src/gallium/auxiliary/gallivm/storage.cpp2
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp99
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.h17
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp23
-rw-r--r--src/gallium/auxiliary/indices/Makefile16
-rw-r--r--src/gallium/auxiliary/indices/SConscript17
-rw-r--r--src/gallium/auxiliary/indices/u_indices.c253
-rw-r--r--src/gallium/auxiliary/indices/u_indices.h83
-rw-r--r--src/gallium/auxiliary/indices/u_indices_gen.c5129
-rw-r--r--src/gallium/auxiliary/indices/u_indices_gen.py319
-rw-r--r--src/gallium/auxiliary/indices/u_indices_priv.h43
-rw-r--r--src/gallium/auxiliary/pipebuffer/Makefile4
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h67
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c183
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h51
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c22
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h21
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c26
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c29
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c11
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c43
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c303
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c27
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c23
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_validate.c86
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_validate.h10
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_winsys.c170
-rw-r--r--src/gallium/auxiliary/rtasm/Makefile1
-rw-r--r--src/gallium/auxiliary/rtasm/SConscript1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c2
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c19
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.c1077
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.h342
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c725
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h513
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c68
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h11
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile1
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c46
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c120
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h21
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_iterate.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c60
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c1363
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.h51
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c374
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c2
-rw-r--r--src/gallium/auxiliary/util/Makefile7
-rw-r--r--src/gallium/auxiliary/util/SConscript13
-rw-r--r--src/gallium/auxiliary/util/u_blit.c7
-rw-r--r--src/gallium/auxiliary/util/u_cache.c2
-rw-r--r--src/gallium/auxiliary/util/u_debug.c (renamed from src/gallium/auxiliary/util/p_debug.c)92
-rw-r--r--src/gallium/auxiliary/util/u_debug.h361
-rw-r--r--src/gallium/auxiliary/util/u_debug_memory.c (renamed from src/gallium/auxiliary/util/p_debug_mem.c)2
-rw-r--r--src/gallium/auxiliary/util/u_debug_profile.c (renamed from src/gallium/auxiliary/util/p_debug_prof.c)0
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c3
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c611
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.c2
-rw-r--r--src/gallium/auxiliary/util/u_hash_table.c2
-rw-r--r--src/gallium/auxiliary/util/u_keymap.c309
-rw-r--r--src/gallium/auxiliary/util/u_keymap.h68
-rw-r--r--src/gallium/auxiliary/util/u_linear.c70
-rw-r--r--src/gallium/auxiliary/util/u_linear.h61
-rw-r--r--src/gallium/auxiliary/util/u_math.h8
-rw-r--r--src/gallium/auxiliary/util/u_memory.h6
-rw-r--r--src/gallium/auxiliary/util/u_mm.c14
-rw-r--r--src/gallium/auxiliary/util/u_mm.h12
-rw-r--r--src/gallium/auxiliary/util/u_prim.h16
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.c143
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.h47
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c4
-rw-r--r--src/gallium/auxiliary/util/u_sse.h77
-rw-r--r--src/gallium/auxiliary/util/u_tile.c35
-rw-r--r--src/gallium/auxiliary/util/u_time.c2
-rw-r--r--src/gallium/auxiliary/util/u_timed_winsys.c50
120 files changed, 13643 insertions, 1356 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 6b1754ea00..0bc77a5728 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -28,7 +28,7 @@
/* Authors: Zack Rusin <zack@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 68508f24de..a9157aad71 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -783,7 +783,7 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst,
dst->width = src->width;
dst->height = src->height;
- dst->num_cbufs = src->num_cbufs;
+ dst->nr_cbufs = src->nr_cbufs;
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index 7f0044c5a7..288cef7b6f 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -30,7 +30,7 @@
* Zack Rusin <zack@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "cso_hash.h"
@@ -431,3 +431,9 @@ struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter
--hash->data.d->size;
return ret;
}
+
+boolean cso_hash_contains(struct cso_hash *hash, unsigned key)
+{
+ struct cso_node **node = cso_hash_find_node(hash, key);
+ return (*node != hash->data.e);
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h
index 85f3e276c6..5891c325fa 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.h
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.h
@@ -44,6 +44,7 @@
#ifndef CSO_HASH_H
#define CSO_HASH_H
+#include "pipe/p_compiler.h"
#ifdef __cplusplus
extern "C" {
@@ -95,6 +96,11 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash);
*/
struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key);
+/**
+ * Returns true if a value with the given key exists in the hash
+ */
+boolean cso_hash_contains(struct cso_hash *hash, unsigned key);
+
int cso_hash_iter_is_null(struct cso_hash_iter iter);
unsigned cso_hash_iter_key(struct cso_hash_iter iter);
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index f2e36a89e9..bdbf5a08ed 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -40,6 +40,7 @@ C_SOURCES = \
draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
+ draw_vs_ppc.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 544a04918b..5f05aa324a 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary(
'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
+ 'draw_vs_ppc.c',
'draw_vs_sse.c',
'draw_vs_varient.c'
])
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index fab8fc95fc..7bd4a2e221 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -343,6 +343,21 @@ draw_num_vs_outputs(const struct draw_context *draw)
}
+/**
+ * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * This might only be used by software drivers for the time being.
+ */
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers)
+{
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+}
+
+
+
void draw_set_render( struct draw_context *draw,
struct vbuf_render *render )
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index a29bb01d81..d529e4e9a2 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -45,7 +45,7 @@ struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
-
+struct tgsi_sampler;
struct draw_context *draw_create( void );
@@ -92,6 +92,12 @@ uint
draw_num_vs_outputs(const struct draw_context *draw);
+void
+draw_texture_samplers(struct draw_context *draw,
+ uint num_samplers,
+ struct tgsi_sampler **samplers);
+
+
/*
* Vertex shader functions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index b764d9c518..a0f9716dac 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
struct tgsi_full_immediate immed;
uint size = 4;
immed = tgsi_default_full_immediate();
- immed.Immediate.Size = 1 + size; /* one for the token itself */
+ immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
immed.u.Pointer = (void *) value;
ctx->emit_immediate(ctx, &immed);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 9825e116c3..12325d30d6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -29,12 +29,12 @@
* \file
* Vertex buffer drawing stage.
*
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
* \author Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -93,7 +93,6 @@ vbuf_stage( struct draw_stage *stage )
}
-static void vbuf_flush_indices( struct vbuf_stage *vbuf );
static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
@@ -109,13 +108,12 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
static INLINE void
check_space( struct vbuf_stage *vbuf, unsigned nr )
{
- if (vbuf->nr_vertices + nr > vbuf->max_vertices ) {
- vbuf_flush_vertices(vbuf);
- vbuf_alloc_vertices(vbuf);
+ if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
+ vbuf->nr_indices + nr > vbuf->max_indices)
+ {
+ vbuf_flush_vertices( vbuf );
+ vbuf_alloc_vertices( vbuf );
}
-
- if (vbuf->nr_indices + nr > vbuf->max_indices )
- vbuf_flush_indices(vbuf);
}
@@ -202,7 +200,7 @@ vbuf_point( struct draw_stage *stage,
* will be flushed if needed and a new one allocated.
*/
static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
+vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
{
struct translate_key hw_key;
unsigned dst_offset;
@@ -217,11 +215,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
* state change.
*/
vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
-
- if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) {
- vbuf_flush_vertices(vbuf);
- vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
- }
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
/* Translate from pipeline vertices to hw vertices.
*/
@@ -294,8 +288,8 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
/* Allocate new buffer?
*/
- if (!vbuf->vertices)
- vbuf_alloc_vertices(vbuf);
+ assert(vbuf->vertices == NULL);
+ vbuf_alloc_vertices(vbuf);
}
@@ -305,9 +299,9 @@ vbuf_first_tri( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri = vbuf_tri;
- vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES);
stage->tri( stage, prim );
}
@@ -318,9 +312,9 @@ vbuf_first_line( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
+ vbuf_start_prim(vbuf, PIPE_PRIM_LINES);
stage->line = vbuf_line;
- vbuf_set_prim(vbuf, PIPE_PRIM_LINES);
stage->line( stage, prim );
}
@@ -331,53 +325,42 @@ vbuf_first_point( struct draw_stage *stage,
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices(vbuf);
+ vbuf_start_prim(vbuf, PIPE_PRIM_POINTS);
stage->point = vbuf_point;
- vbuf_set_prim(vbuf, PIPE_PRIM_POINTS);
stage->point( stage, prim );
}
-static void
-vbuf_flush_indices( struct vbuf_stage *vbuf )
-{
- if(!vbuf->nr_indices)
- return;
-
- assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
- vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
-
- vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
-
- vbuf->nr_indices = 0;
-}
-
/**
* Flush existing vertex buffer and allocate a new one.
- *
- * XXX: We separate flush-on-index-full and flush-on-vb-full, but may
- * raise issues uploading vertices if the hardware wants to flush when
- * we flush.
*/
static void
vbuf_flush_vertices( struct vbuf_stage *vbuf )
{
- if(vbuf->vertices) {
- vbuf_flush_indices(vbuf);
-
+ if(vbuf->vertices) {
+
+ vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 );
+
+ if (vbuf->nr_indices)
+ {
+ vbuf->render->draw(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
+
+ vbuf->nr_indices = 0;
+ }
+
/* Reset temporary vertices ids */
if(vbuf->nr_vertices)
draw_reset_vertex_ids( vbuf->stage.draw );
/* Free the vertex buffer */
- vbuf->render->release_vertices(vbuf->render,
- vbuf->vertices,
- vbuf->vertex_size,
- vbuf->nr_vertices);
+ vbuf->render->release_vertices( vbuf->render );
+
vbuf->max_vertices = vbuf->nr_vertices = 0;
vbuf->vertex_ptr = vbuf->vertices = NULL;
-
}
}
@@ -394,14 +377,20 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* even number */
vbuf->max_vertices = vbuf->max_vertices & ~1;
+ if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
+ vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
+
/* Must always succeed -- driver gives us a
* 'max_vertex_buffer_bytes' which it guarantees it can allocate,
* and it will flush itself if necessary to do so. If this does
* fail, we are basically without usable hardware.
*/
- vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
+ vbuf->render->allocate_vertices(vbuf->render,
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
+
+ vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render );
+
vbuf->vertex_ptr = vbuf->vertices;
}
@@ -412,14 +401,11 @@ vbuf_flush( struct draw_stage *stage, unsigned flags )
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- vbuf_flush_indices( vbuf );
+ vbuf_flush_vertices( vbuf );
stage->point = vbuf_first_point;
stage->line = vbuf_first_line;
stage->tri = vbuf_first_tri;
-
- if (flags & DRAW_FLUSH_BACKEND)
- vbuf_flush_vertices( vbuf );
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index a16b45d340..81e4eae401 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -187,6 +187,9 @@ struct draw_context
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
+ uint num_samplers;
+ struct tgsi_sampler **samplers;
+
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
@@ -198,7 +201,7 @@ struct draw_context
const float (*aligned_constants)[4];
- float (*aligned_constant_storage)[4];
+ const float (*aligned_constant_storage)[4];
unsigned const_storage_size;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 18f24e5980..4e5ffa0930 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
- ptr += draw->pt.vertex_buffer[buf].pitch * ii;
+ ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
debug_printf(" Attr %u: ", j);
@@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" pitch=%u offset=%u ptr=%p\n",
- draw->pt.vertex_buffer[i].pitch,
+ debug_printf(" stride=%u offset=%u ptr=%p\n",
+ draw->pt.vertex_buffer[i].stride,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index c02f229110..aecaeee5b9 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -173,9 +173,7 @@ void draw_pt_emit( struct pt_emit *emit,
void draw_pt_emit_linear( struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count );
void draw_pt_emit_destroy( struct pt_emit *emit );
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d520b05869..064e16c295 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -165,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (vertex_count == 0)
+ return;
+
+ if (vertex_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -173,9 +181,11 @@ void draw_pt_emit( struct pt_emit *emit,
return;
}
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)vertex_count);
+ render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)vertex_count);
+
+ hw_verts = render->map_vertices( render );
if (!hw_verts) {
assert(0);
return;
@@ -196,22 +206,21 @@ void draw_pt_emit( struct pt_emit *emit,
vertex_count,
hw_verts );
+ render->unmap_vertices( render,
+ 0,
+ vertex_count - 1 );
+
render->draw(render,
elts,
count);
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+ render->release_vertices(render);
}
void draw_pt_emit_linear(struct pt_emit *emit,
const float (*vertex_data)[4],
- unsigned vertex_count,
unsigned stride,
- unsigned start,
unsigned count)
{
struct draw_context *draw = emit->draw;
@@ -226,21 +235,23 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
- if (!draw->render->set_primitive(draw->render, emit->prim)) {
- assert(0);
- return;
- }
+ if (!draw->render->set_primitive(draw->render, emit->prim))
+ goto fail;
- hw_verts = render->allocate_vertices(render,
- (ushort)translate->key.output_stride,
- (ushort)count);
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count))
+ goto fail;
+
+ hw_verts = render->map_vertices( render );
+ if (!hw_verts)
+ goto fail;
translate->set_buffer(translate, 0,
vertex_data, stride);
@@ -251,12 +262,12 @@ void draw_pt_emit_linear(struct pt_emit *emit,
translate->run(translate,
0,
- vertex_count,
+ count,
hw_verts);
if (0) {
unsigned i;
- for (i = 0; i < vertex_count; i++) {
+ for (i = 0; i < count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
draw_dump_emitted_vertex( emit->vinfo,
(const uint8_t *)hw_verts +
@@ -264,13 +275,17 @@ void draw_pt_emit_linear(struct pt_emit *emit,
}
}
+ render->unmap_vertices( render, 0, count - 1 );
+
+ render->draw_arrays(render, 0, count);
+
+ render->release_vertices(render);
- render->draw_arrays(render, start, count);
+ return;
- render->release_vertices(render,
- hw_verts,
- translate->key.output_stride,
- vertex_count);
+fail:
+ assert(0);
+ return;
}
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 6377f896fb..058caf7dcc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run_elts( translate,
@@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
translate->run( translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 3966ad48ba..6b7d02a19b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -229,9 +229,16 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)fetch_count );
+ if (fetch_count >= UNDEFINED_VERTEX_ID) {
+ assert(0);
+ return;
+ }
+
+ draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)fetch_count );
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts) {
assert(0);
return;
@@ -254,6 +261,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render,
+ 0,
+ (ushort)(fetch_count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -263,10 +274,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
}
@@ -283,13 +291,17 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices and emit HW verts.
*/
@@ -307,20 +319,21 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, count - 1 );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw_arrays( draw->render,
- 0, /*start*/
- count );
+ draw->render->draw_arrays( draw->render, 0, count );
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -338,9 +351,15 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->translate->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
if (!hw_verts)
return FALSE;
@@ -351,6 +370,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
count,
hw_verts );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
@@ -360,10 +381,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
/* Done -- that was easy, wasn't it:
*/
- draw->render->release_vertices( draw->render,
- hw_verts,
- feme->translate->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index f7e6a1a8ee..cd9cd4b53f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
}
for (i = 0; i < 5 && i < nr_vbs; i++) {
- if (draw->pt.vertex_buffer[i].pitch == 0)
+ if (draw->pt.vertex_buffer[i].stride == 0)
fse->key.const_vbuffers |= (1<<i);
}
@@ -189,7 +189,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
i,
((const ubyte *) draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
- draw->pt.vertex_buffer[i].pitch );
+ draw->pt.vertex_buffer[i].stride );
}
*max_vertices = (draw->render->max_vertex_buffer_bytes /
@@ -234,14 +234,17 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ goto fail;
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -251,13 +254,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
start, count,
hw_verts );
- /* Draw arrays path to avoid re-emitting index list again and
- * again.
- */
- draw->render->draw_arrays( draw->render,
- 0,
- count );
-
+
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
@@ -269,12 +266,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
(const uint8_t *)hw_verts + fse->key.output_stride * i );
}
}
+
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
+
+ return;
+
+fail:
+ assert(0);
+ return;
}
@@ -293,13 +302,17 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)fetch_count );
- if (!hw_verts) {
- assert(0);
- return;
- }
+ if (fetch_count >= UNDEFINED_VERTEX_ID)
+ goto fail;
+
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count ))
+ goto fail;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
+ goto fail;
/* Single routine to fetch vertices, run shader and emit HW verts.
@@ -309,9 +322,6 @@ fse_run(struct draw_pt_middle_end *middle,
fetch_count,
hw_verts );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
if (0) {
unsigned i;
@@ -323,12 +333,19 @@ fse_run(struct draw_pt_middle_end *middle,
}
}
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- fetch_count );
+ draw->render->release_vertices( draw->render );
+ return;
+fail:
+ assert(0);
+ return;
}
@@ -347,13 +364,17 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
+ if (count >= UNDEFINED_VERTEX_ID)
+ return FALSE;
- if (!hw_verts) {
+ if (!draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count ))
+ return FALSE;
+
+ hw_verts = draw->render->map_vertices( draw->render );
+ if (!hw_verts)
return FALSE;
- }
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
@@ -369,11 +390,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
draw_count );
+ draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
+ draw->render->release_vertices( draw->render );
return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index ec3b41c320..38f9b604d3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -251,9 +251,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
else {
draw_pt_emit_linear( fpme->emit,
(const float (*)[4])pipeline_verts->data,
- count,
fpme->vertex_size,
- 0, /*start*/
count );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index c15afe65f1..d0e16c9bc3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
unsigned segment_count,
boolean end )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 1) {
unsigned nr = 0, i;
@@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray,
if (end)
varray->fetch_elts[nr++] = start;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
@@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
unsigned segment_start,
unsigned segment_count )
{
- assert(segment_count+1 < varray->fetch_max);
+ assert(segment_count < varray->fetch_max);
if (segment_count >= 2) {
unsigned nr = 0, i;
@@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray,
for (i = 0 ; i < segment_count; i++)
varray->fetch_elts[nr++] = start + segment_start + i;
- assert(nr < FETCH_MAX);
+ assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 80d7200ca6..5d268a2226 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
unsigned fetch_count = max_index + 1 - min_index;
const ushort *transformed_elts;
ushort *storage = NULL;
- boolean ok;
+ boolean ok = FALSE;
if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
@@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend,
transformed_elts = storage;
}
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
+ if (fetch_count < UNDEFINED_VERTEX_ID)
+ ok = vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
FREE(storage);
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index 9ac068c47b..cccd3bf435 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -30,14 +30,17 @@
* Vertex buffer drawing stage.
*
* \author Keith Whitwell <keith@tungstengraphics.com>
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
*/
#ifndef DRAW_VBUF_H_
#define DRAW_VBUF_H_
+#include "pipe/p_compiler.h"
+
+struct pipe_rasterizer_state;
struct draw_context;
struct vertex_info;
@@ -77,9 +80,14 @@ struct vbuf_render {
* Hardware renderers will use ttm memory, others will just malloc
* something.
*/
- void *(*allocate_vertices)( struct vbuf_render *,
- ushort vertex_size,
- ushort nr_vertices );
+ boolean (*allocate_vertices)( struct vbuf_render *,
+ ushort vertex_size,
+ ushort nr_vertices );
+
+ void *(*map_vertices)( struct vbuf_render * );
+ void (*unmap_vertices)( struct vbuf_render *,
+ ushort min_index,
+ ushort max_index );
/**
* Notify the renderer of the current primitive when it changes.
@@ -106,10 +114,7 @@ struct vbuf_render {
/**
* Called when vbuf is done with this set of vertices:
*/
- void (*release_vertices)( struct vbuf_render *,
- void *vertices,
- unsigned vertex_size,
- unsigned vertices_used );
+ void (*release_vertices)( struct vbuf_render * );
void (*destroy)( struct vbuf_render * );
};
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index a943607d7e..c143cf2372 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -81,9 +81,9 @@ struct vertex_info
* memcmp() comparisons.
*/
struct {
- ubyte interp_mode:4; /**< INTERP_x */
- ubyte emit:4; /**< EMIT_x */
- ubyte src_index; /**< map to post-xform attribs */
+ unsigned interp_mode:4; /**< INTERP_x */
+ unsigned emit:4; /**< EMIT_x */
+ unsigned src_index:8; /**< map to post-xform attribs */
} attrib[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 34adbd49b0..c057cd67fd 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
{
- if (((unsigned)constants) & 0xf) {
+ if (((uintptr_t)constants) & 0xf) {
if (size > draw->vs.const_storage_size) {
if (draw->vs.aligned_constant_storage)
align_free((void *)draw->vs.aligned_constant_storage);
@@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw,
if (!vs) {
vs = draw_create_vs_sse( draw, shader );
if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
+ vs = draw_create_vs_ppc( draw, shader );
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
+ }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 68c24abad3..89ae158751 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 875ecb92db..1fb69ef81a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -32,7 +32,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
@@ -884,7 +884,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp )
}
}
-
+#if 0
static void x87_emit_ex2( struct aos_compilation *cp )
{
struct x86_reg st0 = x86_make_reg(file_x87, 0);
@@ -907,13 +907,17 @@ static void x87_emit_ex2( struct aos_compilation *cp )
assert( stack == cp->func->x87_stack);
}
+#endif
+#if 0
static void PIPE_CDECL print_reg( const char *msg,
const float *reg )
{
debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
}
+#endif
+#if 0
static void emit_print( struct aos_compilation *cp,
const char *message, /* must point to a static string! */
unsigned file,
@@ -965,6 +969,7 @@ static void emit_print( struct aos_compilation *cp,
/* Done...
*/
}
+#endif
/**
* The traditional instructions. All operate on internal registers
@@ -1103,7 +1108,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+#if 0
static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
@@ -1111,6 +1116,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
return TRUE;
}
+#endif
static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
@@ -1566,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
*/
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
-
if (0) {
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg r = aos_get_xmm_reg(cp);
@@ -1575,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
else {
- struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg r = aos_get_xmm_reg(cp);
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
struct x86_reg src = get_xmm_writable( cp, arg0 );
-
- sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
- sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
- sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
- sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, src);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, src);
+
+ sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */
+ sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */
store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+
return TRUE;
}
}
@@ -1877,7 +1891,7 @@ static boolean note_immediate( struct aos_compilation *cp,
unsigned pos = cp->num_immediates++;
unsigned j;
- for (j = 0; j < imm->Immediate.Size; j++) {
+ for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 82d27d4493..b3200df811 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -62,12 +62,15 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
{
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(evs->machine,
- shader->state.tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
-
+ /* Specify the vertex program to interpret/execute.
+ * Avoid rebinding when possible.
+ */
+ if (evs->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(evs->machine,
+ shader->state.tokens,
+ draw->vs.num_samplers,
+ draw->vs.samplers);
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
new file mode 100644
index 0000000000..d35db57d57
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_config.h"
+
+#include "draw_vs.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_ppc.h"
+#include "tgsi/tgsi_ppc.h"
+#include "tgsi/tgsi_parse.h"
+
+
+
+typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
+ float (*outputs)[4][4],
+ float (*temps)[4][4],
+ float (*immeds)[4],
+ float (*consts)[4],
+ const float *builtins);
+
+
+struct draw_ppc_vertex_shader {
+ struct draw_vertex_shader base;
+ struct ppc_function ppc_program;
+
+ codegen_function func;
+};
+
+
+static void
+vs_ppc_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ /* nothing */
+}
+
+
+/**
+ * Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
+ */
+static void
+vs_ppc_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+ unsigned int i;
+
+#define MAX_VERTICES 4
+
+ /* loop over verts */
+ for (i = 0; i < count; i += MAX_VERTICES) {
+ const uint max_vertices = MIN2(MAX_VERTICES, count - i);
+ float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB;
+ float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB;
+ float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB;
+ uint attr;
+
+ /* convert (up to) four input verts to SoA format */
+ for (attr = 0; attr < base->info.num_inputs; attr++) {
+ const float *vIn = (const float *) input;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+#if 0
+ if (attr==0)
+ printf("Input v%d a%d: %f %f %f %f\n",
+ vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]);
+#endif
+ inputs_soa[attr][0][vert] = vIn[attr * 4 + 0];
+ inputs_soa[attr][1][vert] = vIn[attr * 4 + 1];
+ inputs_soa[attr][2][vert] = vIn[attr * 4 + 2];
+ inputs_soa[attr][3][vert] = vIn[attr * 4 + 3];
+ vIn += input_stride / 4;
+ }
+ }
+
+ /* run compiled shader
+ */
+ shader->func(inputs_soa, outputs_soa, temps_soa,
+ (float (*)[4]) shader->base.immediates,
+ (float (*)[4]) constants,
+ ppc_builtin_constants);
+
+ /* convert (up to) four output verts from SoA back to AoS format */
+ for (attr = 0; attr < base->info.num_outputs; attr++) {
+ float *vOut = (float *) output;
+ uint vert;
+ for (vert = 0; vert < max_vertices; vert++) {
+ vOut[attr * 4 + 0] = outputs_soa[attr][0][vert];
+ vOut[attr * 4 + 1] = outputs_soa[attr][1][vert];
+ vOut[attr * 4 + 2] = outputs_soa[attr][2][vert];
+ vOut[attr * 4 + 3] = outputs_soa[attr][3][vert];
+#if 0
+ if (attr==0)
+ printf("Output v%d a%d: %f %f %f %f\n",
+ vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]);
+#endif
+ vOut += output_stride / 4;
+ }
+ }
+
+ /* advance to next group of four input/output verts */
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+ }
+}
+
+
+static void
+vs_ppc_delete( struct draw_vertex_shader *base )
+{
+ struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base;
+
+ ppc_release_func( &shader->ppc_program );
+
+ align_free( (void *) shader->base.immediates );
+
+ FREE( (void*) shader->base.state.tokens );
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_ppc_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_ppc_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+#if 0
+ if (1)
+ vs->base.create_varient = draw_vs_varient_aos_ppc;
+ else
+#endif
+ vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.prepare = vs_ppc_prepare;
+ vs->base.run_linear = vs_ppc_run_linear;
+ vs->base.delete = vs_ppc_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
+ ppc_init_func( &vs->ppc_program );
+
+#if 0
+ ppc_print_code(&vs->ppc_program, TRUE);
+ ppc_indent(&vs->ppc_program, 8);
+#endif
+
+ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
+ &vs->ppc_program,
+ (float (*)[4]) vs->base.immediates,
+ TRUE ))
+ goto fail;
+
+ vs->func = (codegen_function) ppc_get_func( &vs->ppc_program );
+ if (!vs->func) {
+ goto fail;
+ }
+
+ return &vs->base;
+
+fail:
+ /*
+ debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n");
+ */
+
+ ppc_release_func( &vs->ppc_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else /* PIPE_ARCH_PPC */
+
+
+struct draw_vertex_shader *
+draw_create_vs_ppc( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return (void *) 0;
+}
+
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
index c3f7bfba93..5a96d94ec3 100644
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -66,12 +66,12 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
gallivm_builtins.cpp: llvm_builtins.c
clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
- (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
rm temp1.bin
gallivmsoabuiltins.cpp: soabuiltins.c
clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
- (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
rm temp2.bin
# Emacs tags
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
index 29adeea47d..f4af5cc8ad 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -53,7 +53,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
index fcc5c05794..634bac0150 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
@@ -137,4 +137,4 @@ static const unsigned char llvm_builtins_data[] = {
0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,
0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,
0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84,
-0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
index 3a4a41e544..1bd00a0c2a 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -56,7 +56,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
@@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog
llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
llvm::ExecutionEngine *ee = cpu->engine;
assert(ee);
- /*FIXME : remove */
- ee->DisableLazyCompilation();
+ /*FIXME : why was this disabled ? we need it for pow/sqrt/... */
+ ee->DisableLazyCompilation(false);
ee->addModuleProvider(mp);
llvm::Function *func = func_for_shader(prog);
@@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine()
typedef void (*vertex_shader_runner)(void *ainputs,
void *dests,
- float (*aconsts)[4],
- void *temps);
+ float (*aconsts)[4]);
#define MAX_TGSI_VERTICES 4
/*!
@@ -202,7 +201,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
unsigned int i, j;
unsigned slot;
vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
-
assert(runner);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
@@ -224,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
/* run shader */
runner(machine->Inputs,
machine->Outputs,
- (float (*)[4]) constants,
- machine->Temps);
+ (float (*)[4]) constants);
/* Unswizzle all output results
*/
diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h
index ebf3e11cd5..d2c5852bdf 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_p.h
+++ b/src/gallium/auxiliary/gallivm/gallivm_p.h
@@ -101,10 +101,10 @@ static INLINE int gallivm_w_swizzle(int swizzle)
return w;
}
-#endif /* MESA_LLVM */
-
#if defined __cplusplus
}
#endif
+#endif /* MESA_LLVM */
+
#endif
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 599975d5ad..ee8162efce 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -43,7 +43,7 @@
#include <llvm/Function.h>
#include <llvm/InstrTypes.h>
#include <llvm/Instructions.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Bitcode/ReaderWriter.h>
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index a658072551..925e948763 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -37,7 +37,7 @@
#include <llvm/Function.h>
#include <llvm/Instructions.h>
#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Bitcode/ReaderWriter.h>
@@ -90,68 +90,11 @@ llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y,
return res;
}
-std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
-{
- std::vector<llvm::Value*> res(4);
-
- //Extract x's
- llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
- m_storage->constantInt(0),
- name("extractX"));
- //cast it to an unsigned int
- x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
-
- res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
- //only x is valid. the others shouldn't be necessary
- /*
- res[1] = Constant::getNullValue(m_floatVecType);
- res[2] = Constant::getNullValue(m_floatVecType);
- res[3] = Constant::getNullValue(m_floatVecType);
- */
-
- return res;
-}
-
-
-std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
- res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
- res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
- res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
-
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
- res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
- res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
- res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
-
- return res;
-}
-
void InstructionsSoa::end()
{
m_builder.CreateRetVoid();
}
-std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2,
- const std::vector<llvm::Value*> in3)
-{
- std::vector<llvm::Value*> res = mul(in1, in2);
- return add(res, in3);
-}
-
std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
{
std::vector<llvm::Value*> res(4);
@@ -171,6 +114,11 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
return res;
}
+llvm::IRBuilder<>* InstructionsSoa::getIRBuilder()
+{
+ return &m_builder;
+}
+
void InstructionsSoa::createFunctionMap()
{
m_functionsMap[TGSI_OPCODE_ABS] = "abs";
@@ -258,11 +206,12 @@ llvm::Module * InstructionsSoa::currentModule() const
void InstructionsSoa::createBuiltins()
{
+ std::string ErrMsg;
MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
(const char*)&soabuiltins_data[0],
- (const char*)&soabuiltins_data[Elements(soabuiltins_data)]);
- m_builtins = ParseBitcodeFile(buffer);
- std::cout<<"Builtins created at "<<m_builtins<<std::endl;
+ (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]);
+ m_builtins = ParseBitcodeFile(buffer, &ErrMsg);
+ std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl;
assert(m_builtins);
createDependencies();
}
@@ -274,6 +223,41 @@ std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> i
return callBuiltin(func, in1);
}
+std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
+ res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
+ res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
+ res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
+{
+ std::vector<llvm::Value*> res(4);
+
+ //Extract x's
+ llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
+ m_storage->constantInt(0),
+ name("extractX"));
+ //cast it to an unsigned int
+ x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
+
+ res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
+ //only x is valid. the others shouldn't be necessary
+ /*
+ res[1] = Constant::getNullValue(m_floatVecType);
+ res[2] = Constant::getNullValue(m_floatVecType);
+ res[3] = Constant::getNullValue(m_floatVecType);
+ */
+
+ return res;
+}
+
std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
{
@@ -281,6 +265,59 @@ std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> i
return callBuiltin(func, in1, in2);
}
+std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_LIT);
+ return callBuiltin(func, in);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<llvm::Value*> res = mul(in1, in2);
+ return add(res, in3);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MAX);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MIN);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
+ res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
+ res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
+ res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_POWER);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_RSQ);
+ return callBuiltin(func, in);
+}
std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
@@ -289,6 +326,37 @@ std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> i
return callBuiltin(func, in1, in2);
}
+std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
+ res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
+ res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
+ res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
+
+ return res;
+}
+
+void checkFunction(Function *func)
+{
+ for (Function::const_iterator BI = func->begin(), BE = func->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+ for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end();
+ II != IE; ++II) {
+ const Instruction &I = *II;
+ std::cout<< "Instr = "<<I;
+ for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) {
+ const Value *Op = I.getOperand(op);
+ std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl;
+ //I->setOperand(op, V);
+ }
+ }
+ }
+}
+
llvm::Value * InstructionsSoa::allocaTemp()
{
VectorType *vector = VectorType::get(Type::FloatTy, 4);
@@ -408,46 +476,6 @@ std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std
return allocaToResult(allocaPtr);
}
-std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_POWER);
- return callBuiltin(func, in1, in2);
-}
-
-std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_MIN);
- return callBuiltin(func, in1, in2);
-}
-
-
-std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- llvm::Function *func = function(TGSI_OPCODE_MAX);
- return callBuiltin(func, in1, in2);
-}
-
-void checkFunction(Function *func)
-{
- for (Function::const_iterator BI = func->begin(), BE = func->end();
- BI != BE; ++BI) {
- const BasicBlock &BB = *BI;
- for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end();
- II != IE; ++II) {
- const Instruction &I = *II;
- std::cout<< "Instr = "<<I;
- for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) {
- const Value *Op = I.getOperand(op);
- std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl;
- //I->setOperand(op, V);
- }
- }
- }
-}
-
void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
{
assert(originalFunc);
@@ -492,28 +520,4 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
}
}
-std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
- res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
- res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
- res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
-
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
-{
- llvm::Function *func = function(TGSI_OPCODE_LIT);
- return callBuiltin(func, in);
-}
-
-std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
-{
- llvm::Function *func = function(TGSI_OPCODE_RSQ);
- return callBuiltin(func, in);
-}
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
index 3817fdc904..d6831e0a6b 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.h
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -76,6 +76,7 @@ public:
void end();
std::vector<llvm::Value*> extractVector(llvm::Value *vector);
+ llvm::IRBuilder<>* getIRBuilder();
private:
const char * name(const char *prefix) const;
llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp
index 6f373f6dd5..73df24c976 100644
--- a/src/gallium/auxiliary/gallivm/storage.cpp
+++ b/src/gallium/auxiliary/gallivm/storage.cpp
@@ -323,7 +323,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
if (indIdx) {
getElem = GetElementPtrInst::Create(ptr,
- BinaryOperator::create(Instruction::Add,
+ BinaryOperator::Create(Instruction::Add,
indIdx,
constantInt(idx),
name("add"),
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
index 78d754371f..4984ce985c 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.cpp
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -30,7 +30,7 @@
#include "gallivm_p.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include <llvm/BasicBlock.h>
#include <llvm/Module.h>
@@ -48,13 +48,11 @@ using namespace llvm;
StorageSoa::StorageSoa(llvm::BasicBlock *block,
llvm::Value *input,
llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps)
+ llvm::Value *consts)
: m_block(block),
m_input(input),
m_output(output),
m_consts(consts),
- m_temps(temps),
m_immediates(0),
m_idx(0)
{
@@ -93,7 +91,7 @@ void StorageSoa::declareImmediates()
std::vector<float> vals(4);
std::vector<Constant*> channelArray;
- vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0];
+ vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3];
llvm::Constant *xChannel = createConstGlobalVector(vals);
vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
@@ -144,22 +142,43 @@ std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
return res;
}
-std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx)
+llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc)
{
- std::vector<llvm::Value*> res(4);
- llvm::Value *xChannel, *yChannel, *zChannel, *wChannel;
+ std::vector<llvm::Value*> x(4);
+ x[0] = m_builder->CreateExtractElement(vector,
+ constantInt(cc),
+ name("x"));
+
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ Constant *constVector = Constant::getNullValue(vectorType);
+ Value *res = m_builder->CreateInsertElement(constVector, x[0],
+ constantInt(0),
+ name("vecx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(1),
+ name("vecxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(2),
+ name("vecxxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(3),
+ name("vecxxxx"));
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx)
+{
+ llvm::Value* res;
+ std::vector<llvm::Value*> res2(4);
+ llvm::Value *xChannel;
xChannel = elementPointer(m_consts, idx, 0);
- yChannel = elementPointer(m_consts, idx, 1);
- zChannel = elementPointer(m_consts, idx, 2);
- wChannel = elementPointer(m_consts, idx, 3);
- res[0] = alignedArrayLoad(xChannel);
- res[1] = alignedArrayLoad(yChannel);
- res[2] = alignedArrayLoad(zChannel);
- res[3] = alignedArrayLoad(wChannel);
+ res = alignedArrayLoad(xChannel);
- return res;
+ res2[0]=unpackConstElement(m_builder, res,0);
+ res2[1]=unpackConstElement(m_builder, res,1);
+ res2[2]=unpackConstElement(m_builder, res,2);
+ res2[3]=unpackConstElement(m_builder, res,3);
+
+ return res2;
}
std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
@@ -174,14 +193,15 @@ std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
return res;
}
-std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx)
+std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx)
{
std::vector<llvm::Value*> res(4);
+ llvm::Value *temp = m_temps[idx];
- res[0] = element(m_temps, idx, 0);
- res[1] = element(m_temps, idx, 1);
- res[2] = element(m_temps, idx, 2);
- res[3] = element(m_temps, idx, 3);
+ res[0] = element(temp, constantInt(0), 0);
+ res[1] = element(temp, constantInt(0), 1);
+ res[2] = element(temp, constantInt(0), 2);
+ res[3] = element(temp, constantInt(0), 3);
return res;
}
@@ -260,6 +280,12 @@ llvm::Module * StorageSoa::currentModule() const
return m_block->getParent()->getParent();
}
+llvm::Constant * StorageSoa::createConstGlobalFloat(const float val)
+{
+ Constant*c = ConstantFP::get(APFloat(val));
+ return c;
+}
+
llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
{
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -278,7 +304,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v
}
std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle,
- llvm::Value *indIdx)
+ llvm::IRBuilder<>* m_builder,llvm::Value *indIdx)
{
std::vector<llvm::Value*> val(4);
@@ -299,10 +325,10 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in
val = outputElement(realIndex);
break;
case TGSI_FILE_TEMPORARY:
- val = tempElement(realIndex);
+ val = tempElement(m_builder, idx);
break;
case TGSI_FILE_CONSTANT:
- val = constElement(realIndex);
+ val = constElement(m_builder, realIndex);
break;
case TGSI_FILE_IMMEDIATE:
val = immediateElement(realIndex);
@@ -328,19 +354,39 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in
return res;
}
+llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder)
+{
+ VectorType *vector = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vecArray = ArrayType::get(vector, 4);
+ AllocaInst *alloca = new AllocaInst(vecArray, "temp",
+ m_builder->GetInsertBlock());
+
+ return alloca;
+}
+
+
void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
- int mask)
+ int mask, llvm::IRBuilder<>* m_builder)
{
llvm::Value *out = 0;
+ llvm::Value *realIndex = 0;
switch(type) {
case TGSI_FILE_OUTPUT:
out = m_output;
+ realIndex = constantInt(idx);
break;
case TGSI_FILE_TEMPORARY:
- out = m_temps;
+ // if that temp doesn't already exist, alloca it
+ if (m_temps.find(idx) == m_temps.end())
+ m_temps[idx] = allocaTemp(m_builder);
+
+ out = m_temps[idx];
+
+ realIndex = constantInt(0);
break;
case TGSI_FILE_INPUT:
out = m_input;
+ realIndex = constantInt(idx);
break;
case TGSI_FILE_ADDRESS: {
llvm::Value *addr = m_addresses[idx];
@@ -358,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm
assert(0);
break;
}
- llvm::Value *realIndex = constantInt(idx);
if ((mask & TGSI_WRITEMASK_X)) {
llvm::Value *xChannel = elementPointer(out, realIndex, 0);
new StoreInst(val[0], xChannel, false, m_block);
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h
index ae2fc7c6ae..56886f85e7 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.h
+++ b/src/gallium/auxiliary/gallivm/storagesoa.h
@@ -29,6 +29,7 @@
#define STORAGESOA_H
#include <pipe/p_shader_tokens.h>
+#include <llvm/Support/IRBuilder.h>
#include <vector>
#include <list>
@@ -51,14 +52,13 @@ public:
StorageSoa(llvm::BasicBlock *block,
llvm::Value *input,
llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps);
+ llvm::Value *consts);
std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle,
- llvm::Value *indIdx =0);
+ llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0);
void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
- int mask);
+ int mask, llvm::IRBuilder<>* m_builder);
void addImmediate(float *vec);
void declareImmediates();
@@ -76,12 +76,14 @@ private:
const char *name(const char *prefix) const;
llvm::Value *alignedArrayLoad(llvm::Value *val);
llvm::Module *currentModule() const;
+ llvm::Constant *createConstGlobalFloat(const float val);
llvm::Constant *createConstGlobalVector(const std::vector<float> &vec);
std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> constElement(llvm::Value *indIdx);
+ llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc);
+ std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx);
std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> tempElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx);
std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
private:
llvm::BasicBlock *m_block;
@@ -89,12 +91,13 @@ private:
llvm::Value *m_input;
llvm::Value *m_output;
llvm::Value *m_consts;
- llvm::Value *m_temps;
+ std::map<int, llvm::Value*> m_temps;
llvm::GlobalVariable *m_immediates;
std::map<int, llvm::Value*> m_addresses;
std::vector<std::vector<float> > m_immediatesToFlush;
+ llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder);
mutable std::map<int, llvm::ConstantInt*> m_constInts;
mutable char m_name[32];
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 7292c0e366..5b08200d14 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -25,7 +25,7 @@
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
+#include <llvm/Attributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
@@ -52,8 +52,7 @@ static inline FunctionType *vertexShaderFunctionType()
// pass are castable to the following:
// [4 x <4 x float>] inputs,
// [4 x <4 x float>] output,
- // [4 x [4 x float]] consts,
- // [4 x <4 x float>] temps
+ // [4 x [1 x float]] consts,
std::vector<const Type*> funcArgs;
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -61,13 +60,12 @@ static inline FunctionType *vertexShaderFunctionType()
PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4);
- ArrayType *constsArray = ArrayType::get(floatArray, 4);
+ ArrayType *constsArray = ArrayType::get(floatArray, 1);
PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
funcArgs.push_back(vectorArrayPtr);//inputs
funcArgs.push_back(vectorArrayPtr);//output
funcArgs.push_back(constsArrayPtr);//consts
- funcArgs.push_back(vectorArrayPtr);//temps
FunctionType *functionType = FunctionType::get(
/*Result=*/Type::VoidTy,
@@ -162,7 +160,7 @@ translate_immediate(Storage *storage,
{
float vec[4];
int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
vec[i] = imm->u.ImmediateFloat32[i].Float;
@@ -181,7 +179,7 @@ translate_immediateir(StorageSoa *storage,
{
float vec[4];
int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
vec[i] = imm->u.ImmediateFloat32[i].Float;
@@ -707,9 +705,8 @@ translate_instructionir(llvm::Module *module,
if (src->SrcRegister.Indirect) {
indIdx = storage->addrElement(src->SrcRegisterInd.Index);
}
-
val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
- src->SrcRegister.Index, swizzle, indIdx);
+ src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx);
inputs[i] = val;
}
@@ -1025,9 +1022,9 @@ translate_instructionir(llvm::Module *module,
/* store results */
for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
storage->store((enum tgsi_file_type)dst->DstRegister.File,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask,
+ instr->getIRBuilder() );
}
}
@@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
output->setName("outputs");
Value *consts = args++;
consts->setName("consts");
- Value *temps = args++;
- temps->setName("temps");
BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
@@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
fi = tgsi_default_full_instruction();
fd = tgsi_default_full_declaration();
- StorageSoa storage(label_entry, input, output, consts, temps);
+ StorageSoa storage(label_entry, input, output, consts);
InstructionsSoa instr(mod, shader, label_entry, &storage);
while(!tgsi_parse_end_of_tokens(&parse)) {
diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile
new file mode 100644
index 0000000000..8fa61d265e
--- /dev/null
+++ b/src/gallium/auxiliary/indices/Makefile
@@ -0,0 +1,16 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = indices
+
+C_SOURCES = \
+ u_indices_gen.c
+
+include ../../Makefile.template
+
+u_indices_gen.c: u_indices_gen.py
+ python $< > $@
+
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript
new file mode 100644
index 0000000000..65a43a9f64
--- /dev/null
+++ b/src/gallium/auxiliary/indices/SConscript
@@ -0,0 +1,17 @@
+Import('*')
+
+env.CodeGenerate(
+ target = 'u_indices_gen.c',
+ script = 'u_indices_gen.py',
+ source = [],
+ command = 'python $SCRIPT > $TARGET'
+)
+
+indices = env.ConvenienceLibrary(
+ target = 'indices',
+ source = [
+# 'u_indices.c',
+ 'u_indices_gen.c',
+ ])
+
+auxiliaries.insert(0, indices)
diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c
new file mode 100644
index 0000000000..0cf7d88653
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "u_indices.h"
+#include "u_indices_priv.h"
+
+static void translate_memcpy_ushort( const void *in,
+ unsigned nr,
+ void *out )
+{
+ memcpy(out, in, nr*sizeof(short));
+}
+
+static void translate_memcpy_uint( const void *in,
+ unsigned nr,
+ void *out )
+{
+ memcpy(out, in, nr*sizeof(int));
+}
+
+
+int u_index_translator( unsigned hw_mask,
+ unsigned prim,
+ unsigned in_index_size,
+ unsigned nr,
+ unsigned in_pv,
+ unsigned out_pv,
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_translate_func *out_translate )
+{
+ unsigned in_idx;
+ unsigned out_idx;
+ int ret = U_TRANSLATE_NORMAL;
+
+ u_index_init();
+
+ in_idx = in_size_idx(in_index_size);
+ *out_index_size = (in_index_size == 4) ? 4 : 2;
+ out_idx = out_size_idx(*out_index_size);
+
+ if ((hw_mask & (1<<prim)) &&
+ in_index_size == *out_index_size &&
+ in_pv == out_pv)
+ {
+ if (in_index_size == 4)
+ *out_translate = translate_memcpy_uint;
+ else
+ *out_translate = translate_memcpy_ushort;
+
+ *out_prim = prim;
+ *out_nr = nr;
+
+ return U_TRANSLATE_MEMCPY;
+ }
+ else {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_LINES:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = (nr - 1) * 2;
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr * 2;
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = nr;
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_QUADS:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr / 4) * 6;
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ break;
+
+ default:
+ assert(0);
+ *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_TRANSLATE_ERROR;
+ }
+ }
+
+ return ret;
+}
+
+
+
+
+
+int u_index_generator( unsigned hw_mask,
+ unsigned prim,
+ unsigned start,
+ unsigned nr,
+ unsigned in_pv,
+ unsigned out_pv,
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_generate_func *out_generate )
+
+{
+ unsigned out_idx;
+
+ u_index_init();
+
+ *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2;
+ out_idx = out_size_idx(*out_index_size);
+
+ if ((hw_mask & (1<<prim)) &&
+ (in_pv == out_pv)) {
+
+ *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS];
+ *out_prim = prim;
+ *out_nr = nr;
+ return U_GENERATE_LINEAR;
+ }
+ else {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINES:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINE_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = (nr - 1) * 2;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_LINE_LOOP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_LINES;
+ *out_nr = nr * 2;
+ return U_GENERATE_ONE_OFF;
+
+ case PIPE_PRIM_TRIANGLES:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = nr;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_QUADS:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr / 4) * 6;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ case PIPE_PRIM_POLYGON:
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_TRIANGLES;
+ *out_nr = (nr - 2) * 3;
+ return U_GENERATE_REUSABLE;
+
+ default:
+ assert(0);
+ *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_prim = PIPE_PRIM_POINTS;
+ *out_nr = nr;
+ return U_TRANSLATE_ERROR;
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/indices/u_indices.h b/src/gallium/auxiliary/indices/u_indices.h
new file mode 100644
index 0000000000..abf5a3037d
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef U_INDICES_H
+#define U_INDICES_H
+
+#include "pipe/p_compiler.h"
+
+#define PV_FIRST 0
+#define PV_LAST 1
+#define PV_COUNT 2
+
+typedef void (*u_translate_func)( const void *in,
+ unsigned nr,
+ void *out );
+
+typedef void (*u_generate_func)( unsigned nr,
+ void *out );
+
+
+/* Return codes describe the translate/generate operation. Caller may
+ * be able to reuse translated indices under some circumstances.
+ */
+#define U_TRANSLATE_ERROR -1
+#define U_TRANSLATE_NORMAL 1
+#define U_TRANSLATE_MEMCPY 2
+#define U_GENERATE_LINEAR 3
+#define U_GENERATE_REUSABLE 4
+#define U_GENERATE_ONE_OFF 5
+
+
+void u_index_init( void );
+
+int u_index_translator( unsigned hw_mask,
+ unsigned prim,
+ unsigned in_index_size,
+ unsigned nr,
+ unsigned in_pv, /* API */
+ unsigned out_pv, /* hardware */
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_translate_func *out_translate );
+
+/* Note that even when generating it is necessary to know what the
+ * API's PV is, as the indices generated will depend on whether it is
+ * the same as hardware or not, and in the case of triangle strips,
+ * whether it is first or last.
+ */
+int u_index_generator( unsigned hw_mask,
+ unsigned prim,
+ unsigned start,
+ unsigned nr,
+ unsigned in_pv, /* API */
+ unsigned out_pv, /* hardware */
+ unsigned *out_prim,
+ unsigned *out_index_size,
+ unsigned *out_nr,
+ u_generate_func *out_generate );
+
+
+#endif
diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c
new file mode 100644
index 0000000000..3c981e5d7f
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_gen.c
@@ -0,0 +1,5129 @@
+/* File automatically generated by indices.py */
+
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+/**
+ * @file
+ * Functions to translate and generate index lists
+ */
+
+#include "indices/u_indices.h"
+#include "indices/u_indices_priv.h"
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+static unsigned out_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return OUT_UINT;
+ case 2: return OUT_USHORT;
+ default: assert(0); return OUT_USHORT;
+ }
+}
+
+static unsigned in_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return IN_UINT;
+ case 2: return IN_USHORT;
+ case 1: return IN_UBYTE;
+ default: assert(0); return IN_UBYTE;
+ }
+}
+
+
+static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+
+
+
+static void generate_points_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ }
+}
+static void generate_linestrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+}
+static void generate_lineloop_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(0);
+}
+static void generate_tris_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ (out+i)[2] = (ushort)(i+2);
+ }
+}
+static void generate_tristrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1+(i&1));
+ (out+j)[2] = (ushort)(i+2-(i&1));
+ }
+}
+static void generate_trifan_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quads_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+1);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+2);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_quadstrip_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+2);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+0);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_polygon_ushort_first2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_points_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i);
+ }
+}
+static void generate_linestrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+}
+static void generate_lineloop_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i);
+}
+static void generate_tris_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i+2);
+ (out+i)[2] = (ushort)(i);
+ }
+}
+static void generate_tristrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1+(i&1));
+ (out+j)[1] = (ushort)(i+2-(i&1));
+ (out+j)[2] = (ushort)(i);
+ }
+}
+static void generate_trifan_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_quads_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+1);
+ (out+j+0)[1] = (ushort)(i+3);
+ (out+j+0)[2] = (ushort)(i+0);
+ (out+j+3)[0] = (ushort)(i+2);
+ (out+j+3)[1] = (ushort)(i+3);
+ (out+j+3)[2] = (ushort)(i+1);
+ }
+}
+static void generate_quadstrip_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+3);
+ (out+j+0)[2] = (ushort)(i+2);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+3);
+ (out+j+3)[2] = (ushort)(i+0);
+ }
+}
+static void generate_polygon_ushort_first2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_points_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i+1);
+ (out+i)[1] = (ushort)(i);
+ }
+}
+static void generate_linestrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+}
+static void generate_lineloop_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i);
+ }
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i);
+}
+static void generate_tris_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i+2);
+ (out+i)[1] = (ushort)(i);
+ (out+i)[2] = (ushort)(i+1);
+ }
+}
+static void generate_tristrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+2);
+ (out+j)[1] = (ushort)(i+(i&1));
+ (out+j)[2] = (ushort)(i+1-(i&1));
+ }
+}
+static void generate_trifan_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+2);
+ (out+j)[1] = (ushort)(0);
+ (out+j)[2] = (ushort)(i+1);
+ }
+}
+static void generate_quads_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+3);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+1);
+ (out+j+3)[0] = (ushort)(i+3);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quadstrip_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+3);
+ (out+j+0)[1] = (ushort)(i+2);
+ (out+j+0)[2] = (ushort)(i+0);
+ (out+j+3)[0] = (ushort)(i+3);
+ (out+j+3)[1] = (ushort)(i+0);
+ (out+j+3)[2] = (ushort)(i+1);
+ }
+}
+static void generate_polygon_ushort_last2first(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_points_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)(i);
+ }
+}
+static void generate_lines_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ }
+}
+static void generate_linestrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+}
+static void generate_lineloop_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(i+1);
+ }
+ (out+j)[0] = (ushort)(i);
+ (out+j)[1] = (ushort)(0);
+}
+static void generate_tris_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)(i);
+ (out+i)[1] = (ushort)(i+1);
+ (out+i)[2] = (ushort)(i+2);
+ }
+}
+static void generate_tristrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+(i&1));
+ (out+j)[1] = (ushort)(i+1-(i&1));
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_trifan_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(0);
+ (out+j)[1] = (ushort)(i+1);
+ (out+j)[2] = (ushort)(i+2);
+ }
+}
+static void generate_quads_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)(i+0);
+ (out+j+0)[1] = (ushort)(i+1);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+1);
+ (out+j+3)[1] = (ushort)(i+2);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_quadstrip_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)(i+2);
+ (out+j+0)[1] = (ushort)(i+0);
+ (out+j+0)[2] = (ushort)(i+3);
+ (out+j+3)[0] = (ushort)(i+0);
+ (out+j+3)[1] = (ushort)(i+1);
+ (out+j+3)[2] = (ushort)(i+3);
+ }
+}
+static void generate_polygon_ushort_last2last(
+ unsigned nr,
+ void *_out )
+{
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)(i+1);
+ (out+j)[1] = (ushort)(i+2);
+ (out+j)[2] = (ushort)(0);
+ }
+}
+static void generate_points_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ }
+}
+static void generate_linestrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+}
+static void generate_lineloop_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(0);
+}
+static void generate_tris_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ (out+i)[2] = (uint)(i+2);
+ }
+}
+static void generate_tristrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1+(i&1));
+ (out+j)[2] = (uint)(i+2-(i&1));
+ }
+}
+static void generate_trifan_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_quads_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+1);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+2);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_quadstrip_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+2);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+0);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_polygon_uint_first2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_points_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i);
+ }
+}
+static void generate_linestrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+}
+static void generate_lineloop_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i);
+}
+static void generate_tris_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i+2);
+ (out+i)[2] = (uint)(i);
+ }
+}
+static void generate_tristrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1+(i&1));
+ (out+j)[1] = (uint)(i+2-(i&1));
+ (out+j)[2] = (uint)(i);
+ }
+}
+static void generate_trifan_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void generate_quads_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+1);
+ (out+j+0)[1] = (uint)(i+3);
+ (out+j+0)[2] = (uint)(i+0);
+ (out+j+3)[0] = (uint)(i+2);
+ (out+j+3)[1] = (uint)(i+3);
+ (out+j+3)[2] = (uint)(i+1);
+ }
+}
+static void generate_quadstrip_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+3);
+ (out+j+0)[2] = (uint)(i+2);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+3);
+ (out+j+3)[2] = (uint)(i+0);
+ }
+}
+static void generate_polygon_uint_first2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void generate_points_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i+1);
+ (out+i)[1] = (uint)(i);
+ }
+}
+static void generate_linestrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+}
+static void generate_lineloop_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i);
+ }
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i);
+}
+static void generate_tris_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i+2);
+ (out+i)[1] = (uint)(i);
+ (out+i)[2] = (uint)(i+1);
+ }
+}
+static void generate_tristrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+2);
+ (out+j)[1] = (uint)(i+(i&1));
+ (out+j)[2] = (uint)(i+1-(i&1));
+ }
+}
+static void generate_trifan_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+2);
+ (out+j)[1] = (uint)(0);
+ (out+j)[2] = (uint)(i+1);
+ }
+}
+static void generate_quads_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+3);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+1);
+ (out+j+3)[0] = (uint)(i+3);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+2);
+ }
+}
+static void generate_quadstrip_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+3);
+ (out+j+0)[1] = (uint)(i+2);
+ (out+j+0)[2] = (uint)(i+0);
+ (out+j+3)[0] = (uint)(i+3);
+ (out+j+3)[1] = (uint)(i+0);
+ (out+j+3)[2] = (uint)(i+1);
+ }
+}
+static void generate_polygon_uint_last2first(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_points_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)(i);
+ }
+}
+static void generate_lines_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ }
+}
+static void generate_linestrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+}
+static void generate_lineloop_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(i+1);
+ }
+ (out+j)[0] = (uint)(i);
+ (out+j)[1] = (uint)(0);
+}
+static void generate_tris_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)(i);
+ (out+i)[1] = (uint)(i+1);
+ (out+i)[2] = (uint)(i+2);
+ }
+}
+static void generate_tristrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+(i&1));
+ (out+j)[1] = (uint)(i+1-(i&1));
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_trifan_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(0);
+ (out+j)[1] = (uint)(i+1);
+ (out+j)[2] = (uint)(i+2);
+ }
+}
+static void generate_quads_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)(i+0);
+ (out+j+0)[1] = (uint)(i+1);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+1);
+ (out+j+3)[1] = (uint)(i+2);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_quadstrip_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)(i+2);
+ (out+j+0)[1] = (uint)(i+0);
+ (out+j+0)[2] = (uint)(i+3);
+ (out+j+3)[0] = (uint)(i+0);
+ (out+j+3)[1] = (uint)(i+1);
+ (out+j+3)[2] = (uint)(i+3);
+ }
+}
+static void generate_polygon_uint_last2last(
+ unsigned nr,
+ void *_out )
+{
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)(i+1);
+ (out+j)[1] = (uint)(i+2);
+ (out+j)[2] = (uint)(0);
+ }
+}
+static void translate_points_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_ubyte2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_ubyte2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_ubyte2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_ubyte2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ubyte2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ubyte*in = (const ubyte*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ushort2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_ushort2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_ushort2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_ushort2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ushort2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_ushort2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_ushort2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_ushort2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const ushort*in = (const ushort*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1+(i&1)];
+ (out+j)[2] = (ushort)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_uint2ushort_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i+2];
+ (out+i)[2] = (ushort)in[i];
+ }
+}
+static void translate_tristrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1+(i&1)];
+ (out+j)[1] = (ushort)in[i+2-(i&1)];
+ (out+j)[2] = (ushort)in[i];
+ }
+}
+static void translate_trifan_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_quads_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+1];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+2];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quadstrip_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+3];
+ (out+j+0)[2] = (ushort)in[i+2];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+3];
+ (out+j+3)[2] = (ushort)in[i+0];
+ }
+}
+static void translate_polygon_uint2ushort_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i+1];
+ (out+i)[1] = (ushort)in[i];
+ }
+}
+static void translate_linestrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+}
+static void translate_lineloop_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i];
+ }
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i];
+}
+static void translate_tris_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i+2];
+ (out+i)[1] = (ushort)in[i];
+ (out+i)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_tristrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[i+(i&1)];
+ (out+j)[2] = (ushort)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+2];
+ (out+j)[1] = (ushort)in[0];
+ (out+j)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_quads_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+1];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quadstrip_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+3];
+ (out+j+0)[1] = (ushort)in[i+2];
+ (out+j+0)[2] = (ushort)in[i+0];
+ (out+j+3)[0] = (ushort)in[i+3];
+ (out+j+3)[1] = (ushort)in[i+0];
+ (out+j+3)[2] = (ushort)in[i+1];
+ }
+}
+static void translate_polygon_uint2ushort_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_points_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (ushort)in[i];
+ }
+}
+static void translate_lines_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_linestrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+}
+static void translate_lineloop_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[i+1];
+ }
+ (out+j)[0] = (ushort)in[i];
+ (out+j)[1] = (ushort)in[0];
+}
+static void translate_tris_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (ushort)in[i];
+ (out+i)[1] = (ushort)in[i+1];
+ (out+i)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_tristrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+(i&1)];
+ (out+j)[1] = (ushort)in[i+1-(i&1)];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_trifan_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[0];
+ (out+j)[1] = (ushort)in[i+1];
+ (out+j)[2] = (ushort)in[i+2];
+ }
+}
+static void translate_quads_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (ushort)in[i+0];
+ (out+j+0)[1] = (ushort)in[i+1];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+1];
+ (out+j+3)[1] = (ushort)in[i+2];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (ushort)in[i+2];
+ (out+j+0)[1] = (ushort)in[i+0];
+ (out+j+0)[2] = (ushort)in[i+3];
+ (out+j+3)[0] = (ushort)in[i+0];
+ (out+j+3)[1] = (ushort)in[i+1];
+ (out+j+3)[2] = (ushort)in[i+3];
+ }
+}
+static void translate_polygon_uint2ushort_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ ushort *out = (ushort*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (ushort)in[i+1];
+ (out+j)[1] = (ushort)in[i+2];
+ (out+j)[2] = (ushort)in[0];
+ }
+}
+static void translate_points_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1+(i&1)];
+ (out+j)[2] = (uint)in[i+2-(i&1)];
+ }
+}
+static void translate_trifan_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_uint2uint_first2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i+2];
+ (out+i)[2] = (uint)in[i];
+ }
+}
+static void translate_tristrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1+(i&1)];
+ (out+j)[1] = (uint)in[i+2-(i&1)];
+ (out+j)[2] = (uint)in[i];
+ }
+}
+static void translate_trifan_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_quads_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+1];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+2];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quadstrip_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+3];
+ (out+j+0)[2] = (uint)in[i+2];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+3];
+ (out+j+3)[2] = (uint)in[i+0];
+ }
+}
+static void translate_polygon_uint2uint_first2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+static void translate_points_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i+1];
+ (out+i)[1] = (uint)in[i];
+ }
+}
+static void translate_linestrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+}
+static void translate_lineloop_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i];
+ }
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i];
+}
+static void translate_tris_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i+2];
+ (out+i)[1] = (uint)in[i];
+ (out+i)[2] = (uint)in[i+1];
+ }
+}
+static void translate_tristrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[i+(i&1)];
+ (out+j)[2] = (uint)in[i+1-(i&1)];
+ }
+}
+static void translate_trifan_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+2];
+ (out+j)[1] = (uint)in[0];
+ (out+j)[2] = (uint)in[i+1];
+ }
+}
+static void translate_quads_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+1];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quadstrip_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+3];
+ (out+j+0)[1] = (uint)in[i+2];
+ (out+j+0)[2] = (uint)in[i+0];
+ (out+j+3)[0] = (uint)in[i+3];
+ (out+j+3)[1] = (uint)in[i+0];
+ (out+j+3)[2] = (uint)in[i+1];
+ }
+}
+static void translate_polygon_uint2uint_last2first(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_points_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i++) {
+ (out+i)[0] = (uint)in[i];
+ }
+}
+static void translate_lines_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=2) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ }
+}
+static void translate_linestrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+}
+static void translate_lineloop_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr - 2; j+=2, i++) {
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[i+1];
+ }
+ (out+j)[0] = (uint)in[i];
+ (out+j)[1] = (uint)in[0];
+}
+static void translate_tris_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (i = 0; i < nr; i+=3) {
+ (out+i)[0] = (uint)in[i];
+ (out+i)[1] = (uint)in[i+1];
+ (out+i)[2] = (uint)in[i+2];
+ }
+}
+static void translate_tristrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+(i&1)];
+ (out+j)[1] = (uint)in[i+1-(i&1)];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_trifan_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[0];
+ (out+j)[1] = (uint)in[i+1];
+ (out+j)[2] = (uint)in[i+2];
+ }
+}
+static void translate_quads_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=4) {
+ (out+j+0)[0] = (uint)in[i+0];
+ (out+j+0)[1] = (uint)in[i+1];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+1];
+ (out+j+3)[1] = (uint)in[i+2];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_quadstrip_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=6, i+=2) {
+ (out+j+0)[0] = (uint)in[i+2];
+ (out+j+0)[1] = (uint)in[i+0];
+ (out+j+0)[2] = (uint)in[i+3];
+ (out+j+3)[0] = (uint)in[i+0];
+ (out+j+3)[1] = (uint)in[i+1];
+ (out+j+3)[2] = (uint)in[i+3];
+ }
+}
+static void translate_polygon_uint2uint_last2last(
+ const void * _in,
+ unsigned nr,
+ void *_out )
+{
+ const uint*in = (const uint*)_in;
+ uint *out = (uint*)_out;
+ unsigned i, j;
+ (void)j;
+ for (j = i = 0; j < nr; j+=3, i++) {
+ (out+j)[0] = (uint)in[i+1];
+ (out+j)[1] = (uint)in[i+2];
+ (out+j)[2] = (uint)in[0];
+ }
+}
+void u_index_init( void )
+{
+ static int firsttime = 1;
+ if (!firsttime) return;
+ firsttime = 0;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2first;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2last;
+generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2last;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2first;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2last;
+generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2last;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2first;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2last;
+generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2last;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2first;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2last;
+generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2first;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2first;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2first;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2first;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2last;
+translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2first;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2first;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2first;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2first;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2last;
+translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2first;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2first;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2last;
+translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2first;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2first;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2last;
+translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2last;
+}
+#include "indices/u_indices.c"
diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py
new file mode 100644
index 0000000000..af63d09930
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_gen.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+copyright = '''
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+'''
+
+GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint'
+FIRST, LAST = 'first', 'last'
+
+INTYPES = (GENERATE, UBYTE, USHORT, UINT)
+OUTTYPES = (USHORT, UINT)
+PVS=(FIRST, LAST)
+PRIMS=('points',
+ 'lines',
+ 'linestrip',
+ 'lineloop',
+ 'tris',
+ 'trifan',
+ 'tristrip',
+ 'quads',
+ 'quadstrip',
+ 'polygon')
+
+LONGPRIMS=('PIPE_PRIM_POINTS',
+ 'PIPE_PRIM_LINES',
+ 'PIPE_PRIM_LINE_STRIP',
+ 'PIPE_PRIM_LINE_LOOP',
+ 'PIPE_PRIM_TRIANGLES',
+ 'PIPE_PRIM_TRIANGLE_FAN',
+ 'PIPE_PRIM_TRIANGLE_STRIP',
+ 'PIPE_PRIM_QUADS',
+ 'PIPE_PRIM_QUAD_STRIP',
+ 'PIPE_PRIM_POLYGON')
+
+longprim = dict(zip(PRIMS, LONGPRIMS))
+intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT')
+outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT')
+pv_idx = dict(first='PV_FIRST', last='PV_LAST')
+
+
+def prolog():
+ print '''/* File automatically generated by indices.py */'''
+ print copyright
+ print r'''
+
+/**
+ * @file
+ * Functions to translate and generate index lists
+ */
+
+#include "indices/u_indices.h"
+#include "indices/u_indices_priv.h"
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+
+static unsigned out_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return OUT_UINT;
+ case 2: return OUT_USHORT;
+ default: assert(0); return OUT_USHORT;
+ }
+}
+
+static unsigned in_size_idx( unsigned index_size )
+{
+ switch (index_size) {
+ case 4: return IN_UINT;
+ case 2: return IN_USHORT;
+ case 1: return IN_UBYTE;
+ default: assert(0); return IN_UBYTE;
+ }
+}
+
+
+static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+
+
+'''
+
+def vert( intype, outtype, v0 ):
+ if intype == GENERATE:
+ return '(' + outtype + ')(' + v0 + ')'
+ else:
+ return '(' + outtype + ')in[' + v0 + ']'
+
+def point( intype, outtype, ptr, v0 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+
+def line( intype, outtype, ptr, v0, v1 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+ print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';'
+
+def tri( intype, outtype, ptr, v0, v1, v2 ):
+ print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';'
+ print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';'
+ print ' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';'
+
+def do_point( intype, outtype, ptr, v0 ):
+ point( intype, outtype, ptr, v0 )
+
+def do_line( intype, outtype, ptr, v0, v1, inpv, outpv ):
+ if inpv == outpv:
+ line( intype, outtype, ptr, v0, v1 )
+ else:
+ line( intype, outtype, ptr, v1, v0 )
+
+def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ):
+ if inpv == outpv:
+ tri( intype, outtype, ptr, v0, v1, v2 )
+ else:
+ if inpv == FIRST:
+ tri( intype, outtype, ptr, v1, v2, v0 )
+ else:
+ tri( intype, outtype, ptr, v2, v0, v1 )
+
+def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ):
+ do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv );
+ do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv );
+
+def name(intype, outtype, inpv, outpv, prim):
+ if intype == GENERATE:
+ return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv
+ else:
+ return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv
+
+def preamble(intype, outtype, inpv, outpv, prim):
+ print 'static void ' + name( intype, outtype, inpv, outpv, prim ) + '('
+ if intype != GENERATE:
+ print ' const void * _in,'
+ print ' unsigned nr,'
+ print ' void *_out )'
+ print '{'
+ if intype != GENERATE:
+ print ' const ' + intype + '*in = (const ' + intype + '*)_in;'
+ print ' ' + outtype + ' *out = (' + outtype + '*)_out;'
+ print ' unsigned i, j;'
+ print ' (void)j;'
+
+def postamble():
+ print '}'
+
+
+def points(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='points')
+ print ' for (i = 0; i < nr; i++) { '
+ do_point( intype, outtype, 'out+i', 'i' );
+ print ' }'
+ postamble()
+
+def lines(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='lines')
+ print ' for (i = 0; i < nr; i+=2) { '
+ do_line( intype, outtype, 'out+i', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ postamble()
+
+def linestrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='linestrip')
+ print ' for (j = i = 0; j < nr; j+=2, i++) { '
+ do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ postamble()
+
+def lineloop(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='lineloop')
+ print ' for (j = i = 0; j < nr - 2; j+=2, i++) { '
+ do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv );
+ print ' }'
+ do_line( intype, outtype, 'out+j', 'i', '0', inpv, outpv );
+ postamble()
+
+def tris(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='tris')
+ print ' for (i = 0; i < nr; i+=3) { '
+ do_tri( intype, outtype, 'out+i', 'i', 'i+1', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def tristrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='tristrip')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ if inpv == FIRST:
+ do_tri( intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv );
+ else:
+ do_tri( intype, outtype, 'out+j', 'i+(i&1)', 'i+1-(i&1)', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def trifan(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='trifan')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+
+def polygon(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='polygon')
+ print ' for (j = i = 0; j < nr; j+=3, i++) { '
+ if inpv == FIRST:
+ do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv );
+ else:
+ do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', '0', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def quads(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='quads')
+ print ' for (j = i = 0; j < nr; j+=6, i+=4) { '
+ do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def quadstrip(intype, outtype, inpv, outpv):
+ preamble(intype, outtype, inpv, outpv, prim='quadstrip')
+ print ' for (j = i = 0; j < nr; j+=6, i+=2) { '
+ do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv );
+ print ' }'
+ postamble()
+
+
+def emit_funcs():
+ for intype in INTYPES:
+ for outtype in OUTTYPES:
+ for inpv in (FIRST, LAST):
+ for outpv in (FIRST, LAST):
+ points(intype, outtype, inpv, outpv)
+ lines(intype, outtype, inpv, outpv)
+ linestrip(intype, outtype, inpv, outpv)
+ lineloop(intype, outtype, inpv, outpv)
+ tris(intype, outtype, inpv, outpv)
+ tristrip(intype, outtype, inpv, outpv)
+ trifan(intype, outtype, inpv, outpv)
+ quads(intype, outtype, inpv, outpv)
+ quadstrip(intype, outtype, inpv, outpv)
+ polygon(intype, outtype, inpv, outpv)
+
+def init(intype, outtype, inpv, outpv, prim):
+ if intype == GENERATE:
+ print ('generate[' +
+ outtype_idx[outtype] +
+ '][' + pv_idx[inpv] +
+ '][' + pv_idx[outpv] +
+ '][' + longprim[prim] +
+ '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';')
+ else:
+ print ('translate[' +
+ intype_idx[intype] +
+ '][' + outtype_idx[outtype] +
+ '][' + pv_idx[inpv] +
+ '][' + pv_idx[outpv] +
+ '][' + longprim[prim] +
+ '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';')
+
+
+def emit_all_inits():
+ for intype in INTYPES:
+ for outtype in OUTTYPES:
+ for inpv in PVS:
+ for outpv in PVS:
+ for prim in PRIMS:
+ init(intype, outtype, inpv, outpv, prim)
+
+def emit_init():
+ print 'void u_index_init( void )'
+ print '{'
+ print ' static int firsttime = 1;'
+ print ' if (!firsttime) return;'
+ print ' firsttime = 0;'
+ emit_all_inits()
+ print '}'
+
+
+
+
+def epilog():
+ print '#include "indices/u_indices.c"'
+
+
+def main():
+ prolog()
+ emit_funcs()
+ emit_init()
+ epilog()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/auxiliary/indices/u_indices_priv.h b/src/gallium/auxiliary/indices/u_indices_priv.h
new file mode 100644
index 0000000000..9c3298c24d
--- /dev/null
+++ b/src/gallium/auxiliary/indices/u_indices_priv.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef U_INDICES_PRIV_H
+#define U_INDICES_PRIV_H
+
+#include "pipe/p_compiler.h"
+#include "u_indices.h"
+
+#define IN_UBYTE 0
+#define IN_USHORT 1
+#define IN_UINT 2
+#define IN_COUNT 3
+
+#define OUT_USHORT 0
+#define OUT_UINT 1
+#define OUT_COUNT 2
+
+
+#define PRIM_COUNT (PIPE_PRIM_POLYGON + 1)
+
+#endif
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
index f9b39d9ce0..3b501c51df 100644
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ b/src/gallium/auxiliary/pipebuffer/Makefile
@@ -11,10 +11,10 @@ C_SOURCES = \
pb_bufmgr_debug.c \
pb_bufmgr_fenced.c \
pb_bufmgr_mm.c \
+ pb_bufmgr_ondemand.c \
pb_bufmgr_pool.c \
pb_bufmgr_slab.c \
- pb_validate.c \
- pb_winsys.c
+ pb_validate.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
index 56a40dda0d..8e9f06abe4 100644
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ b/src/gallium/auxiliary/pipebuffer/SConscript
@@ -10,10 +10,10 @@ pipebuffer = env.ConvenienceLibrary(
'pb_bufmgr_debug.c',
'pb_bufmgr_fenced.c',
'pb_bufmgr_mm.c',
+ 'pb_bufmgr_ondemand.c',
'pb_bufmgr_pool.c',
'pb_bufmgr_slab.c',
'pb_validate.c',
- 'pb_winsys.c',
])
auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 8505d333bd..e6b0b30ff4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -37,7 +37,7 @@
* There is no obligation of a winsys driver to use this library. And a pipe
* driver should be completly agnostic about it.
*
- * \author Jos� Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFFER_H_
@@ -45,7 +45,8 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
+#include "pipe/p_error.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
@@ -56,6 +57,8 @@ extern "C" {
struct pb_vtbl;
+struct pb_validate;
+
/**
* Buffer description.
@@ -104,6 +107,13 @@ struct pb_vtbl
void (*unmap)( struct pb_buffer *buf );
+ enum pipe_error (*validate)( struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags );
+
+ void (*fence)( struct pb_buffer *buf,
+ struct pipe_fence_handle *fence );
+
/**
* Get the base buffer and the offset.
*
@@ -118,6 +128,7 @@ struct pb_vtbl
void (*get_base_buffer)( struct pb_buffer *buf,
struct pb_buffer **base_buf,
unsigned *offset );
+
};
@@ -148,6 +159,7 @@ pb_map(struct pb_buffer *buf,
assert(buf);
if(!buf)
return NULL;
+ assert(buf->base.refcount > 0);
return buf->vtbl->map(buf, flags);
}
@@ -158,6 +170,7 @@ pb_unmap(struct pb_buffer *buf)
assert(buf);
if(!buf)
return;
+ assert(buf->base.refcount > 0);
buf->vtbl->unmap(buf);
}
@@ -173,7 +186,33 @@ pb_get_base_buffer( struct pb_buffer *buf,
offset = 0;
return;
}
+ assert(buf->base.refcount > 0);
+ assert(buf->vtbl->get_base_buffer);
buf->vtbl->get_base_buffer(buf, base_buf, offset);
+ assert(*base_buf);
+ assert(*offset < (*base_buf)->base.size);
+}
+
+
+static INLINE enum pipe_error
+pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
+{
+ assert(buf);
+ if(!buf)
+ return PIPE_ERROR;
+ assert(buf->vtbl->validate);
+ return buf->vtbl->validate(buf, vl, flags);
+}
+
+
+static INLINE void
+pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
+{
+ assert(buf);
+ if(!buf)
+ return;
+ assert(buf->vtbl->fence);
+ buf->vtbl->fence(buf, fence);
}
@@ -183,6 +222,7 @@ pb_destroy(struct pb_buffer *buf)
assert(buf);
if(!buf)
return;
+ assert(buf->base.refcount == 0);
buf->vtbl->destroy(buf);
}
@@ -193,11 +233,16 @@ static INLINE void
pb_reference(struct pb_buffer **dst,
struct pb_buffer *src)
{
- if (src)
+ if (src) {
+ assert(src->base.refcount);
src->base.refcount++;
+ }
- if (*dst && --(*dst)->base.refcount == 0)
- pb_destroy( *dst );
+ if (*dst) {
+ assert((*dst)->base.refcount);
+ if(--(*dst)->base.refcount == 0)
+ pb_destroy( *dst );
+ }
*dst = src;
}
@@ -210,7 +255,13 @@ pb_reference(struct pb_buffer **dst,
static INLINE boolean
pb_check_alignment(size_t requested, size_t provided)
{
- return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE;
+ if(!requested)
+ return TRUE;
+ if(requested > provided)
+ return FALSE;
+ if(provided % requested != 0)
+ return FALSE;
+ return TRUE;
}
@@ -234,10 +285,6 @@ pb_malloc_buffer_create(size_t size,
const struct pb_desc *desc);
-void
-pb_init_winsys(struct pipe_winsys *winsys);
-
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 17b2781052..0cddc95aa6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -29,7 +29,7 @@
* \file
* Implementation of fenced buffers.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
@@ -43,8 +43,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -59,19 +58,12 @@
*/
#define SUPER(__derived) (&(__derived)->base)
-#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \
- ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE )
-#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \
- ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE )
-#define PIPE_BUFFER_USAGE_WRITE \
- ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE )
-
struct fenced_buffer_list
{
pipe_mutex mutex;
- struct pipe_winsys *winsys;
+ struct pb_fence_ops *ops;
size_t numDelayed;
@@ -97,6 +89,8 @@ struct fenced_buffer
unsigned flags;
unsigned mapcount;
+ struct pb_validate *vl;
+ unsigned validation_flags;
struct pipe_fence_handle *fence;
struct list_head head;
@@ -108,7 +102,6 @@ static INLINE struct fenced_buffer *
fenced_buffer(struct pb_buffer *buf)
{
assert(buf);
- assert(buf->vtbl == &fenced_buffer_vtbl);
return (struct fenced_buffer *)buf;
}
@@ -146,12 +139,12 @@ static INLINE void
_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
struct fenced_buffer *fenced_buf)
{
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
assert(fenced_buf->fence);
assert(fenced_buf->list == fenced_list);
- winsys->fence_reference(winsys, &fenced_buf->fence, NULL);
+ ops->fence_reference(ops, &fenced_buf->fence, NULL);
fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
assert(fenced_buf->head.prev);
@@ -174,7 +167,7 @@ static INLINE enum pipe_error
_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
{
struct fenced_buffer_list *fenced_list = fenced_buf->list;
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
#if 0
debug_warning("waiting for GPU");
@@ -182,7 +175,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf)
assert(fenced_buf->fence);
if(fenced_buf->fence) {
- if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) {
+ if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
return PIPE_ERROR;
}
/* Remove from the fenced list */
@@ -202,7 +195,7 @@ static void
_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
int wait)
{
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct pb_fence_ops *ops = fenced_list->ops;
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
struct pipe_fence_handle *prev_fence = NULL;
@@ -215,15 +208,15 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
if(fenced_buf->fence != prev_fence) {
int signaled;
if (wait)
- signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0);
+ signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
else
- signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0);
+ signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
if (signaled != 0)
break;
prev_fence = fenced_buf->fence;
}
else {
- assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
+ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
_fenced_buffer_remove(fenced_list, fenced_buf);
@@ -243,14 +236,14 @@ fenced_buffer_destroy(struct pb_buffer *buf)
pipe_mutex_lock(fenced_list->mutex);
assert(fenced_buf->base.base.refcount == 0);
if (fenced_buf->fence) {
- struct pipe_winsys *winsys = fenced_list->winsys;
- if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) {
+ struct pb_fence_ops *ops = fenced_list->ops;
+ if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
struct list_head *curr, *prev;
curr = &fenced_buf->head;
prev = curr->prev;
do {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
+ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
_fenced_buffer_remove(fenced_list, fenced_buf);
curr = prev;
prev = curr->prev;
@@ -274,6 +267,7 @@ fenced_buffer_map(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
void *map;
+ assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE);
assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE));
flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
@@ -315,6 +309,93 @@ fenced_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+fenced_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ enum pipe_error ret;
+
+ if(!vl) {
+ /* invalidate */
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+ return PIPE_OK;
+ }
+
+ assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
+ /* Buffer cannot be validated in two different lists */
+ if(fenced_buf->vl && fenced_buf->vl != vl)
+ return PIPE_ERROR_RETRY;
+
+ /* Do not validate if buffer is still mapped */
+ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
+ /* TODO: wait for the thread that mapped the buffer to unmap it */
+ return PIPE_ERROR_RETRY;
+ }
+
+ if(fenced_buf->vl == vl &&
+ (fenced_buf->validation_flags & flags) == flags) {
+ /* Nothing to do -- buffer already validated */
+ return PIPE_OK;
+ }
+
+ /* Final sanity checking */
+ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
+ assert(!fenced_buf->mapcount);
+
+ ret = pb_validate(fenced_buf->buffer, vl, flags);
+ if (ret != PIPE_OK)
+ return ret;
+
+ fenced_buf->vl = vl;
+ fenced_buf->validation_flags |= flags;
+
+ return PIPE_OK;
+}
+
+
+static void
+fenced_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct fenced_buffer *fenced_buf;
+ struct fenced_buffer_list *fenced_list;
+ struct pb_fence_ops *ops;
+
+ fenced_buf = fenced_buffer(buf);
+ fenced_list = fenced_buf->list;
+ ops = fenced_list->ops;
+
+ if(fence == fenced_buf->fence) {
+ /* Nothing to do */
+ return;
+ }
+
+ assert(fenced_buf->vl);
+ assert(fenced_buf->validation_flags);
+
+ pipe_mutex_lock(fenced_list->mutex);
+ if (fenced_buf->fence)
+ _fenced_buffer_remove(fenced_list, fenced_buf);
+ if (fence) {
+ ops->fence_reference(ops, &fenced_buf->fence, fence);
+ fenced_buf->flags |= fenced_buf->validation_flags;
+ _fenced_buffer_add(fenced_buf);
+ }
+ pipe_mutex_unlock(fenced_list->mutex);
+
+ pb_fence(fenced_buf->buffer, fence);
+
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+}
+
+
static void
fenced_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -325,11 +406,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
}
-const struct pb_vtbl
+static const struct pb_vtbl
fenced_buffer_vtbl = {
fenced_buffer_destroy,
fenced_buffer_map,
fenced_buffer_unmap,
+ fenced_buffer_validate,
+ fenced_buffer_fence,
fenced_buffer_get_base_buffer
};
@@ -362,54 +445,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
}
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
- struct fenced_buffer *fenced_buf;
- struct fenced_buffer_list *fenced_list;
- struct pipe_winsys *winsys;
- /* FIXME: receive this as a parameter */
- unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0;
-
- /* This is a public function, so be extra cautious with the buffer passed,
- * as happens frequently to receive null buffers, or pointer to buffers
- * other than fenced buffers. */
- assert(buf);
- if(!buf)
- return;
- assert(buf->vtbl == &fenced_buffer_vtbl);
- if(buf->vtbl != &fenced_buffer_vtbl)
- return;
-
- fenced_buf = fenced_buffer(buf);
- fenced_list = fenced_buf->list;
- winsys = fenced_list->winsys;
-
- if(!fence || fence == fenced_buf->fence) {
- /* Handle the same fence case specially, not only because it is a fast
- * path, but mostly to avoid serializing two writes with the same fence,
- * as that would bring the hardware down to synchronous operation without
- * any benefit.
- */
- fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- return;
- }
-
- pipe_mutex_lock(fenced_list->mutex);
- if (fenced_buf->fence)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- if (fence) {
- winsys->fence_reference(winsys, &fenced_buf->fence, fence);
- fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- _fenced_buffer_add(fenced_buf);
- }
- pipe_mutex_unlock(fenced_list->mutex);
-}
-
-
struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys)
+fenced_buffer_list_create(struct pb_fence_ops *ops)
{
struct fenced_buffer_list *fenced_list;
@@ -417,7 +454,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys)
if (!fenced_list)
return NULL;
- fenced_list->winsys = winsys;
+ fenced_list->ops = ops;
LIST_INITHEAD(&fenced_list->delayed);
@@ -456,6 +493,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
pipe_mutex_unlock(fenced_list->mutex);
+ fenced_list->ops->destroy(fenced_list->ops);
+
FREE(fenced_list);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
index 50d5891bdb..c2b29e974b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
@@ -44,14 +44,14 @@
* Between the handle's destruction, and the fence signalling, the buffer is
* stored in a fenced buffer list.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFFER_FENCED_H_
#define PB_BUFFER_FENCED_H_
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -59,7 +59,6 @@ extern "C" {
#endif
-struct pipe_winsys;
struct pipe_buffer;
struct pipe_fence_handle;
@@ -70,12 +69,33 @@ struct pipe_fence_handle;
struct fenced_buffer_list;
-/**
- * The fenced buffer's virtual function table.
- *
- * NOTE: Made public for debugging purposes.
- */
-extern const struct pb_vtbl fenced_buffer_vtbl;
+struct pb_fence_ops
+{
+ void (*destroy)( struct pb_fence_ops *ops );
+
+ /** Set ptr = fence, with reference counting */
+ void (*fence_reference)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence );
+
+ /**
+ * Checks whether the fence has been signalled.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_signalled)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
+ /**
+ * Wait for the fence to finish.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_finish)( struct pb_fence_ops *ops,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+};
/**
@@ -84,7 +104,7 @@ extern const struct pb_vtbl fenced_buffer_vtbl;
* See also fenced_bufmgr_create for a more convenient way to use this.
*/
struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys);
+fenced_buffer_list_create(struct pb_fence_ops *ops);
/**
@@ -108,17 +128,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced,
struct pb_buffer *buffer);
-/**
- * Set a buffer's fence.
- *
- * NOTE: Although it takes a generic pb_buffer argument, it will fail
- * on everything but buffers returned by fenced_buffer_create.
- */
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence);
-
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index 1bf22a2ec0..282802b171 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -34,7 +34,7 @@
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
@@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+malloc_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ assert(0);
+ return PIPE_ERROR;
+}
+
+
+static void
+malloc_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ assert(0);
+}
+
+
static void
malloc_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -96,6 +114,8 @@ malloc_buffer_vtbl = {
malloc_buffer_destroy,
malloc_buffer_map,
malloc_buffer_unmap,
+ malloc_buffer_validate,
+ malloc_buffer_fence,
malloc_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cafbee045a..fec8db91c7 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -43,7 +43,7 @@
* - the fenced buffer manager, which will delay buffer destruction until the
* the moment the card finishing processing it.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#ifndef PB_BUFMGR_H_
@@ -61,7 +61,6 @@ extern "C" {
struct pb_desc;
struct pipe_buffer;
-struct pipe_winsys;
/**
@@ -163,6 +162,8 @@ pb_cache_manager_create(struct pb_manager *provider,
unsigned usecs);
+struct pb_fence_ops;
+
/**
* Fenced buffer manager.
*
@@ -174,7 +175,7 @@ pb_cache_manager_create(struct pb_manager *provider,
*/
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys);
+ struct pb_fence_ops *ops);
struct pb_manager *
@@ -183,6 +184,20 @@ pb_alt_manager_create(struct pb_manager *provider1,
/**
+ * Ondemand buffer manager.
+ *
+ * Buffers are created in malloc'ed memory (fast and cached), and the constents
+ * is transfered to a buffer from the provider (typically in slow uncached
+ * memory) when there is an attempt to validate the buffer.
+ *
+ * Ideal for situations where one does not know before hand whether a given
+ * buffer will effectively be used by the hardware or not.
+ */
+struct pb_manager *
+pb_ondemand_manager_create(struct pb_manager *provider);
+
+
+/**
* Debug buffer manager to detect buffer under- and overflows.
*
* Band size should be a multiple of the largest alignment
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index c956924cc7..db67d46c56 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -34,7 +34,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 8f118874ec..29117efe9b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -29,14 +29,13 @@
* \file
* Buffer cache.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -183,6 +182,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf)
}
+static enum pipe_error
+pb_cache_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_cache_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ pb_fence(buf->buffer, fence);
+}
+
+
static void
pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
@@ -198,6 +216,8 @@ pb_cache_buffer_vtbl = {
pb_cache_buffer_destroy,
pb_cache_buffer_map,
pb_cache_buffer_unmap,
+ pb_cache_buffer_validate,
+ pb_cache_buffer_fence,
pb_cache_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 1675e6e182..070bf3f517 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -29,13 +29,12 @@
* \file
* Debug buffer manager to detect buffer under- and overflows.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_winsys.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -255,11 +254,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf,
}
+static enum pipe_error
+pb_debug_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
+
+ pb_debug_buffer_check(buf);
+
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_debug_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
+ pb_fence(buf->buffer, fence);
+}
+
+
const struct pb_vtbl
pb_debug_buffer_vtbl = {
pb_debug_buffer_destroy,
pb_debug_buffer_map,
pb_debug_buffer_unmap,
+ pb_debug_buffer_validate,
+ pb_debug_buffer_fence,
pb_debug_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index 633ee70a75..e352f5357b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -30,11 +30,11 @@
* \file
* A buffer manager that wraps buffers in fenced buffers.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.dot.com>
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
@@ -86,8 +86,7 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr,
fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf);
if(!fenced_buf) {
- assert(buf->base.refcount == 1);
- pb_destroy(buf);
+ pb_reference(&buf, NULL);
}
return fenced_buf;
@@ -123,7 +122,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys)
+ struct pb_fence_ops *ops)
{
struct fenced_pb_manager *fenced_mgr;
@@ -139,7 +138,7 @@ fenced_bufmgr_create(struct pb_manager *provider,
fenced_mgr->base.flush = fenced_bufmgr_flush;
fenced_mgr->provider = provider;
- fenced_mgr->fenced_list = fenced_buffer_list_create(winsys);
+ fenced_mgr->fenced_list = fenced_buffer_list_create(ops);
if(!fenced_mgr->fenced_list) {
FREE(fenced_mgr);
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index fe80ca30ee..f3b1ca73b0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -29,12 +29,12 @@
* \file
* Buffer manager using the old texture memory manager.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
#include "pipe/p_defines.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
@@ -100,7 +100,7 @@ mm_buffer_destroy(struct pb_buffer *buf)
assert(buf->base.refcount == 0);
pipe_mutex_lock(mm->mutex);
- mmFreeMem(mm_buf->block);
+ u_mmFreeMem(mm_buf->block);
FREE(buf);
pipe_mutex_unlock(mm->mutex);
}
@@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+mm_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+ return pb_validate(mm->buffer, vl, flags);
+}
+
+
+static void
+mm_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+ pb_fence(mm->buffer, fence);
+}
+
+
static void
mm_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -141,6 +162,8 @@ mm_buffer_vtbl = {
mm_buffer_destroy,
mm_buffer_map,
mm_buffer_unmap,
+ mm_buffer_validate,
+ mm_buffer_fence,
mm_buffer_get_base_buffer
};
@@ -154,8 +177,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
struct mm_buffer *mm_buf;
/* We don't handle alignments larger then the one initially setup */
- assert(desc->alignment % (1 << mm->align2) == 0);
- if(desc->alignment % (1 << mm->align2))
+ assert(pb_check_alignment(desc->alignment, 1 << mm->align2));
+ if(!pb_check_alignment(desc->alignment, 1 << mm->align2))
return NULL;
pipe_mutex_lock(mm->mutex);
@@ -175,14 +198,14 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
mm_buf->mgr = mm;
- mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
+ mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0);
if(!mm_buf->block) {
debug_printf("warning: heap full\n");
#if 0
mmDumpMemInfo(mm->heap);
#endif
- mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
+ mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0);
if(!mm_buf->block) {
FREE(mm_buf);
pipe_mutex_unlock(mm->mutex);
@@ -213,7 +236,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr)
pipe_mutex_lock(mm->mutex);
- mmDestroy(mm->heap);
+ u_mmDestroy(mm->heap);
pb_unmap(mm->buffer);
pb_reference(&mm->buffer, NULL);
@@ -254,7 +277,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
if(!mm->map)
goto failure;
- mm->heap = mmInit(0, size);
+ mm->heap = u_mmInit(0, size);
if (!mm->heap)
goto failure;
@@ -262,7 +285,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
failure:
if(mm->heap)
- mmDestroy(mm->heap);
+ u_mmDestroy(mm->heap);
if(mm->map)
pb_unmap(mm->buffer);
if(mm)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
new file mode 100644
index 0000000000..3d9c7bba0b
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -0,0 +1,303 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * A variation of malloc buffers which get transferred to real graphics memory
+ * when there is an attempt to validate them.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+struct pb_ondemand_manager;
+
+
+struct pb_ondemand_buffer
+{
+ struct pb_buffer base;
+
+ struct pb_ondemand_manager *mgr;
+
+ /** Regular malloc'ed memory */
+ void *data;
+ unsigned mapcount;
+
+ /** Real buffer */
+ struct pb_buffer *buffer;
+ size_t size;
+ struct pb_desc desc;
+};
+
+
+struct pb_ondemand_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+};
+
+
+extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
+
+static INLINE struct pb_ondemand_buffer *
+pb_ondemand_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &pb_ondemand_buffer_vtbl);
+ return (struct pb_ondemand_buffer *)buf;
+}
+
+static INLINE struct pb_ondemand_manager *
+pb_ondemand_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_ondemand_manager *)mgr;
+}
+
+
+static void
+pb_ondemand_buffer_destroy(struct pb_buffer *_buf)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ pb_reference(&buf->buffer, NULL);
+
+ align_free(buf->data);
+
+ FREE(buf);
+}
+
+
+static void *
+pb_ondemand_buffer_map(struct pb_buffer *_buf,
+ unsigned flags)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(buf->buffer) {
+ assert(!buf->data);
+ return pb_map(buf->buffer, flags);
+ }
+ else {
+ assert(buf->data);
+ ++buf->mapcount;
+ return buf->data;
+ }
+}
+
+
+static void
+pb_ondemand_buffer_unmap(struct pb_buffer *_buf)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(buf->buffer) {
+ assert(!buf->data);
+ pb_unmap(buf->buffer);
+ }
+ else {
+ assert(buf->data);
+ assert(buf->mapcount);
+ if(buf->mapcount)
+ --buf->mapcount;
+ }
+}
+
+
+static enum pipe_error
+pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
+{
+ if(!buf->buffer) {
+ struct pb_manager *provider = buf->mgr->provider;
+ uint8_t *map;
+
+ assert(!buf->mapcount);
+
+ buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc);
+ if(!buf->buffer)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ if(!map) {
+ pb_reference(&buf->buffer, NULL);
+ return PIPE_ERROR;
+ }
+
+ memcpy(map, buf->data, buf->size);
+
+ pb_unmap(buf->buffer);
+
+ if(!buf->mapcount) {
+ FREE(buf->data);
+ buf->data = NULL;
+ }
+ }
+
+ return PIPE_OK;
+}
+
+static enum pipe_error
+pb_ondemand_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+ enum pipe_error ret;
+
+ assert(!buf->mapcount);
+ if(buf->mapcount)
+ return PIPE_ERROR;
+
+ ret = pb_ondemand_buffer_instantiate(buf);
+ if(ret != PIPE_OK)
+ return ret;
+
+ return pb_validate(buf->buffer, vl, flags);
+}
+
+
+static void
+pb_ondemand_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ assert(buf->buffer);
+ if(!buf->buffer)
+ return;
+
+ pb_fence(buf->buffer, fence);
+}
+
+
+static void
+pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
+
+ if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) {
+ assert(0);
+ *base_buf = &buf->base;
+ *offset = 0;
+ return;
+ }
+
+ pb_get_base_buffer(buf->buffer, base_buf, offset);
+}
+
+
+const struct pb_vtbl
+pb_ondemand_buffer_vtbl = {
+ pb_ondemand_buffer_destroy,
+ pb_ondemand_buffer_map,
+ pb_ondemand_buffer_unmap,
+ pb_ondemand_buffer_validate,
+ pb_ondemand_buffer_fence,
+ pb_ondemand_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+pb_ondemand_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+ struct pb_ondemand_buffer *buf;
+
+ buf = CALLOC_STRUCT(pb_ondemand_buffer);
+ if(!buf)
+ return NULL;
+
+ buf->base.base.refcount = 1;
+ buf->base.base.alignment = desc->alignment;
+ buf->base.base.usage = desc->usage;
+ buf->base.base.size = size;
+ buf->base.vtbl = &pb_ondemand_buffer_vtbl;
+
+ buf->mgr = mgr;
+
+ buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
+ if(!buf->data) {
+ FREE(buf);
+ return NULL;
+ }
+
+ buf->size = size;
+ buf->desc = *desc;
+
+ return &buf->base;
+}
+
+
+static void
+pb_ondemand_manager_flush(struct pb_manager *_mgr)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+
+ mgr->provider->flush(mgr->provider);
+}
+
+
+static void
+pb_ondemand_manager_destroy(struct pb_manager *_mgr)
+{
+ struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
+
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_ondemand_manager_create(struct pb_manager *provider)
+{
+ struct pb_ondemand_manager *mgr;
+
+ if(!provider)
+ return NULL;
+
+ mgr = CALLOC_STRUCT(pb_ondemand_manager);
+ if(!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_ondemand_manager_destroy;
+ mgr->base.create_buffer = pb_ondemand_manager_create_buffer;
+ mgr->base.flush = pb_ondemand_manager_flush;
+
+ mgr->provider = provider;
+
+ return &mgr->base;
+}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 61ac291ed7..12447acfd9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -30,13 +30,13 @@
* \file
* Batch buffer pool management.
*
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf)
}
+static enum pipe_error
+pool_buffer_validate(struct pb_buffer *buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ return pb_validate(pool->buffer, vl, flags);
+}
+
+
+static void
+pool_buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ pb_fence(pool->buffer, fence);
+}
+
+
static void
pool_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
@@ -155,6 +176,8 @@ pool_buffer_vtbl = {
pool_buffer_destroy,
pool_buffer_map,
pool_buffer_unmap,
+ pool_buffer_validate,
+ pool_buffer_fence,
pool_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 2a80154920..a3259351b9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -38,7 +38,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf)
}
+static enum pipe_error
+pb_slab_buffer_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ return pb_validate(buf->slab->bo, vl, flags);
+}
+
+
+static void
+pb_slab_buffer_fence(struct pb_buffer *_buf,
+ struct pipe_fence_handle *fence)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ pb_fence(buf->slab->bo, fence);
+}
+
+
static void
pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
@@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = {
pb_slab_buffer_destroy,
pb_slab_buffer_map,
pb_slab_buffer_unmap,
+ pb_slab_buffer_validate,
+ pb_slab_buffer_fence,
pb_slab_buffer_get_base_buffer
};
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c
index 1e54fc39d4..150fd50618 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c
@@ -36,7 +36,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_error.h"
#include "util/u_memory.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pb_buffer.h"
#include "pb_buffer_fenced.h"
@@ -46,9 +46,16 @@
#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */
+struct pb_validate_entry
+{
+ struct pb_buffer *buf;
+ unsigned flags;
+};
+
+
struct pb_validate
{
- struct pb_buffer **buffers;
+ struct pb_validate_entry *entries;
unsigned used;
unsigned size;
};
@@ -56,43 +63,50 @@ struct pb_validate
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
- struct pb_buffer *buf)
+ struct pb_buffer *buf,
+ unsigned flags)
{
assert(buf);
if(!buf)
return PIPE_ERROR;
+ assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE));
+ flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
/* We only need to store one reference for each buffer, so avoid storing
- * consecutive references for the same buffer. It might not be the more
- * common pasttern, but it is easy to implement.
+ * consecutive references for the same buffer. It might not be the most
+ * common pattern, but it is easy to implement.
*/
- if(vl->used && vl->buffers[vl->used - 1] == buf) {
+ if(vl->used && vl->entries[vl->used - 1].buf == buf) {
+ vl->entries[vl->used - 1].flags |= flags;
return PIPE_OK;
}
/* Grow the table */
if(vl->used == vl->size) {
unsigned new_size;
- struct pb_buffer **new_buffers;
+ struct pb_validate_entry *new_entries;
new_size = vl->size * 2;
if(!new_size)
return PIPE_ERROR_OUT_OF_MEMORY;
- new_buffers = (struct pb_buffer **)REALLOC(vl->buffers,
- vl->size*sizeof(struct pb_buffer *),
- new_size*sizeof(struct pb_buffer *));
- if(!new_buffers)
+ new_entries = (struct pb_validate_entry *)REALLOC(vl->entries,
+ vl->size*sizeof(struct pb_validate_entry),
+ new_size*sizeof(struct pb_validate_entry));
+ if(!new_entries)
return PIPE_ERROR_OUT_OF_MEMORY;
- memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *));
+ memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry));
vl->size = new_size;
- vl->buffers = new_buffers;
+ vl->entries = new_entries;
}
- assert(!vl->buffers[vl->used]);
- pb_reference(&vl->buffers[vl->used], buf);
+ assert(!vl->entries[vl->used].buf);
+ pb_reference(&vl->entries[vl->used].buf, buf);
+ vl->entries[vl->used].flags = flags;
++vl->used;
return PIPE_OK;
@@ -100,10 +114,36 @@ pb_validate_add_buffer(struct pb_validate *vl,
enum pipe_error
+pb_validate_foreach(struct pb_validate *vl,
+ enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
+ void *data)
+{
+ unsigned i;
+ for(i = 0; i < vl->used; ++i) {
+ enum pipe_error ret;
+ ret = callback(vl->entries[i].buf, data);
+ if(ret != PIPE_OK)
+ return ret;
+ }
+ return PIPE_OK;
+}
+
+
+enum pipe_error
pb_validate_validate(struct pb_validate *vl)
{
- /* FIXME: go through each buffer, ensure its not mapped, its address is
- * available -- requires a new pb_buffer interface */
+ unsigned i;
+
+ for(i = 0; i < vl->used; ++i) {
+ enum pipe_error ret;
+ ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags);
+ if(ret != PIPE_OK) {
+ while(i--)
+ pb_validate(vl->entries[i].buf, NULL, 0);
+ return ret;
+ }
+ }
+
return PIPE_OK;
}
@@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl,
{
unsigned i;
for(i = 0; i < vl->used; ++i) {
- buffer_fence(vl->buffers[i], fence);
- pb_reference(&vl->buffers[i], NULL);
+ pb_fence(vl->entries[i].buf, fence);
+ pb_reference(&vl->entries[i].buf, NULL);
}
vl->used = 0;
}
@@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl)
{
unsigned i;
for(i = 0; i < vl->used; ++i)
- pb_reference(&vl->buffers[i], NULL);
- FREE(vl->buffers);
+ pb_reference(&vl->entries[i].buf, NULL);
+ FREE(vl->entries);
FREE(vl);
}
@@ -142,8 +182,8 @@ pb_validate_create()
return NULL;
vl->size = PB_VALIDATE_INITIAL_SIZE;
- vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *));
- if(!vl->buffers) {
+ vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry));
+ if(!vl->entries) {
FREE(vl);
return NULL;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h
index 3db1d5330b..dfb84df1ce 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h
@@ -58,7 +58,13 @@ struct pb_validate;
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
- struct pb_buffer *buf);
+ struct pb_buffer *buf,
+ unsigned flags);
+
+enum pipe_error
+pb_validate_foreach(struct pb_validate *vl,
+ enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
+ void *data);
/**
* Validate all buffers for hardware access.
@@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl);
/**
* Fence all buffers and clear the list.
*
- * Should be called right before issuing commands to the hardware.
+ * Should be called right after issuing commands to the hardware.
*/
void
pb_validate_fence(struct pb_validate *vl,
diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c
deleted file mode 100644
index 28d137dbc4..0000000000
--- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Implementation of client buffer (also designated as "user buffers"), which
- * are just state-tracker owned data masqueraded as buffers.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-
-#include "pipe/p_winsys.h"
-#include "util/u_memory.h"
-
-#include "pb_buffer.h"
-
-
-/**
- * User buffers are special buffers that initially reference memory
- * held by the user but which may if necessary copy that memory into
- * device memory behind the scenes, for submission to hardware.
- *
- * These are particularly useful when the referenced data is never
- * submitted to hardware at all, in the particular case of software
- * vertex processing.
- */
-struct pb_user_buffer
-{
- struct pb_buffer base;
- void *data;
-};
-
-
-extern const struct pb_vtbl pb_user_buffer_vtbl;
-
-
-static INLINE struct pb_user_buffer *
-pb_user_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &pb_user_buffer_vtbl);
- return (struct pb_user_buffer *)buf;
-}
-
-
-static void
-pb_user_buffer_destroy(struct pb_buffer *buf)
-{
- assert(buf);
- FREE(buf);
-}
-
-
-static void *
-pb_user_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- return pb_user_buffer(buf)->data;
-}
-
-
-static void
-pb_user_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-pb_user_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-
-const struct pb_vtbl
-pb_user_buffer_vtbl = {
- pb_user_buffer_destroy,
- pb_user_buffer_map,
- pb_user_buffer_unmap,
- pb_user_buffer_get_base_buffer
-};
-
-
-static struct pipe_buffer *
-pb_winsys_user_buffer_create(struct pipe_winsys *winsys,
- void *data,
- unsigned bytes)
-{
- struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer);
-
- if(!buf)
- return NULL;
-
- buf->base.base.refcount = 1;
- buf->base.base.size = bytes;
- buf->base.base.alignment = 0;
- buf->base.base.usage = 0;
-
- buf->base.vtbl = &pb_user_buffer_vtbl;
- buf->data = data;
-
- return &buf->base.base;
-}
-
-
-static void *
-pb_winsys_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags)
-{
- (void)winsys;
- return pb_map(pb_buffer(buf), flags);
-}
-
-
-static void
-pb_winsys_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_unmap(pb_buffer(buf));
-}
-
-
-static void
-pb_winsys_buffer_destroy(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_destroy(pb_buffer(buf));
-}
-
-
-void
-pb_init_winsys(struct pipe_winsys *winsys)
-{
- winsys->user_buffer_create = pb_winsys_user_buffer_create;
- winsys->buffer_map = pb_winsys_buffer_map;
- winsys->buffer_unmap = pb_winsys_buffer_unmap;
- winsys->buffer_destroy = pb_winsys_buffer_destroy;
-}
diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile
index 39b8a4dbd7..252dc5274a 100644
--- a/src/gallium/auxiliary/rtasm/Makefile
+++ b/src/gallium/auxiliary/rtasm/Makefile
@@ -7,6 +7,7 @@ C_SOURCES = \
rtasm_cpu.c \
rtasm_execmem.c \
rtasm_x86sse.c \
+ rtasm_ppc.c \
rtasm_ppc_spe.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript
index 8ea25922aa..eb48368acc 100644
--- a/src/gallium/auxiliary/rtasm/SConscript
+++ b/src/gallium/auxiliary/rtasm/SConscript
@@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary(
'rtasm_cpu.c',
'rtasm_execmem.c',
'rtasm_x86sse.c',
+ 'rtasm_ppc.c',
'rtasm_ppc_spe.c',
])
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 5499018b21..03bdd47238 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -26,7 +26,7 @@
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "rtasm_cpu.h"
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index f16191cb61..5acc5bcb7b 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -31,19 +31,20 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_thread.h"
#include "util/u_memory.h"
#include "rtasm_execmem.h"
-#if defined(__linux__)
+#if defined(PIPE_OS_LINUX)
+
/*
* Allocate a large block of memory which can hold code then dole it out
* in pieces by means of the generic memory manager code.
-*/
+ */
#include <unistd.h>
#include <sys/mman.h>
@@ -62,7 +63,7 @@ static void
init_heap(void)
{
if (!exec_heap)
- exec_heap = mmInit( 0, EXEC_HEAP_SIZE );
+ exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE );
if (!exec_mem)
exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE,
@@ -83,7 +84,7 @@ rtasm_exec_malloc(size_t size)
if (exec_heap) {
size = (size + 31) & ~31; /* next multiple of 32 bytes */
- block = mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */
+ block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */
}
if (block)
@@ -103,17 +104,17 @@ rtasm_exec_free(void *addr)
pipe_mutex_lock(exec_mutex);
if (exec_heap) {
- struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
+ struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
if (block)
- mmFreeMem(block);
+ u_mmFreeMem(block);
}
pipe_mutex_unlock(exec_mutex);
}
-#else
+#else /* PIPE_OS_LINUX */
/*
* Just use regular memory.
@@ -133,4 +134,4 @@ rtasm_exec_free(void *addr)
}
-#endif
+#endif /* PIPE_OS_LINUX */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
new file mode 100644
index 0000000000..e3586482db
--- /dev/null
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -0,0 +1,1077 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * PPC code generation.
+ * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
+ * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
+ *
+ * Other PPC refs:
+ * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
+ * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
+ * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
+ *
+ * \author Brian Paul
+ */
+
+
+#include <stdio.h>
+#include "util/u_memory.h"
+#include "util/u_debug.h"
+#include "rtasm_execmem.h"
+#include "rtasm_ppc.h"
+
+
+void
+ppc_init_func(struct ppc_function *p)
+{
+ uint i;
+
+ memset(p, 0, sizeof(*p));
+
+ p->num_inst = 0;
+ p->max_inst = 100; /* first guess at buffer size */
+ p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
+ p->reg_used = 0x0;
+ p->fp_used = 0x0;
+ p->vec_used = 0x0;
+
+ p->print = FALSE;
+ p->indent = 0;
+
+ /* only allow using gp registers 3..12 for now */
+ for (i = 0; i < 3; i++)
+ ppc_reserve_register(p, i);
+ for (i = 12; i < PPC_NUM_REGS; i++)
+ ppc_reserve_register(p, i);
+}
+
+
+void
+ppc_release_func(struct ppc_function *p)
+{
+ assert(p->num_inst <= p->max_inst);
+ if (p->store != NULL) {
+ rtasm_exec_free(p->store);
+ }
+ p->store = NULL;
+}
+
+
+uint
+ppc_num_instructions(const struct ppc_function *p)
+{
+ return p->num_inst;
+}
+
+
+void (*ppc_get_func(struct ppc_function *p))(void)
+{
+#if 0
+ DUMP_END();
+ if (DISASSEM && p->store)
+ debug_printf("disassemble %p %p\n", p->store, p->csr);
+
+ if (p->store == p->error_overflow)
+ return (void (*)(void)) NULL;
+ else
+#endif
+ return (void (*)(void)) p->store;
+}
+
+
+void
+ppc_dump_func(const struct ppc_function *p)
+{
+ uint i;
+ for (i = 0; i < p->num_inst; i++) {
+ debug_printf("%3u: 0x%08x\n", i, p->store[i]);
+ }
+}
+
+
+void
+ppc_print_code(struct ppc_function *p, boolean enable)
+{
+ p->print = enable;
+}
+
+
+void
+ppc_indent(struct ppc_function *p, int spaces)
+{
+ p->indent += spaces;
+}
+
+
+static void
+indent(const struct ppc_function *p)
+{
+ int i;
+ for (i = 0; i < p->indent; i++) {
+ putchar(' ');
+ }
+}
+
+
+void
+ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
+{
+ if (p->print) {
+ p->indent += rel_indent;
+ indent(p);
+ p->indent -= rel_indent;
+ printf("# %s\n", s);
+ }
+}
+
+
+/**
+ * Mark a register as being unavailable.
+ */
+int
+ppc_reserve_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_REGS);
+ p->reg_used |= (1 << reg);
+ return reg;
+}
+
+
+/**
+ * Allocate a general purpose register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->reg_used & mask) == 0) {
+ p->reg_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given general purpose register as "unallocated".
+ */
+void
+ppc_release_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_REGS);
+ assert(p->reg_used & (1 << reg));
+ p->reg_used &= ~(1 << reg);
+}
+
+
+/**
+ * Allocate a floating point register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_fp_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_FP_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->fp_used & mask) == 0) {
+ p->fp_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC FP registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given floating point register as "unallocated".
+ */
+void
+ppc_release_fp_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_FP_REGS);
+ assert(p->fp_used & (1 << reg));
+ p->fp_used &= ~(1 << reg);
+}
+
+
+/**
+ * Allocate a vector register.
+ * \return register index or -1 if none left.
+ */
+int
+ppc_allocate_vec_register(struct ppc_function *p)
+{
+ unsigned i;
+ for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
+ const uint64_t mask = 1 << i;
+ if ((p->vec_used & mask) == 0) {
+ p->vec_used |= mask;
+ return i;
+ }
+ }
+ printf("OUT OF PPC VEC registers!\n");
+ return -1;
+}
+
+
+/**
+ * Mark the given vector register as "unallocated".
+ */
+void
+ppc_release_vec_register(struct ppc_function *p, int reg)
+{
+ assert(reg < PPC_NUM_VEC_REGS);
+ assert(p->vec_used & (1 << reg));
+ p->vec_used &= ~(1 << reg);
+}
+
+
+/**
+ * Append instruction to instruction buffer. Grow buffer if out of room.
+ */
+static void
+emit_instruction(struct ppc_function *p, uint32_t inst_bits)
+{
+ if (!p->store)
+ return; /* out of memory, drop the instruction */
+
+ if (p->num_inst == p->max_inst) {
+ /* allocate larger buffer */
+ uint32_t *newbuf;
+ p->max_inst *= 2; /* 2x larger */
+ newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
+ if (newbuf) {
+ memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
+ }
+ rtasm_exec_free(p->store);
+ p->store = newbuf;
+ if (!p->store) {
+ /* out of memory */
+ p->num_inst = 0;
+ return;
+ }
+ }
+
+ p->store[p->num_inst++] = inst_bits;
+}
+
+
+union vx_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned op2:11;
+ } inst;
+};
+
+static INLINE void
+emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
+ const char *format, boolean transpose)
+{
+ union vx_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ if (transpose)
+ printf(format, vD, vB, vA);
+ else
+ printf(format, vD, vA, vB);
+ }
+}
+
+
+union vxr_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned rC:1;
+ unsigned op2:10;
+ } inst;
+};
+
+static INLINE void
+emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
+ const char *format)
+{
+ union vxr_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.rC = 0;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vD, vA, vB);
+ }
+}
+
+
+union va_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vD:5;
+ unsigned vA:5;
+ unsigned vB:5;
+ unsigned vC:5;
+ unsigned op2:6;
+ } inst;
+};
+
+static INLINE void
+emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
+ const char *format)
+{
+ union va_inst inst;
+ inst.inst.op = 4;
+ inst.inst.vD = vD;
+ inst.inst.vA = vA;
+ inst.inst.vB = vB;
+ inst.inst.vC = vC;
+ inst.inst.op2 = op2;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vD, vA, vB, vC);
+ }
+}
+
+
+union i_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned li:24;
+ unsigned aa:1;
+ unsigned lk:1;
+ } inst;
+};
+
+static INLINE void
+emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
+{
+ union i_inst inst;
+ inst.inst.op = op;
+ inst.inst.li = li;
+ inst.inst.aa = aa;
+ inst.inst.lk = lk;
+ emit_instruction(p, inst.bits);
+}
+
+
+union xl_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned bo:5;
+ unsigned bi:5;
+ unsigned unused:3;
+ unsigned bh:2;
+ unsigned op2:10;
+ unsigned lk:1;
+ } inst;
+};
+
+static INLINE void
+emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
+ uint op2, uint lk)
+{
+ union xl_inst inst;
+ inst.inst.op = op;
+ inst.inst.bo = bo;
+ inst.inst.bi = bi;
+ inst.inst.unused = 0x0;
+ inst.inst.bh = bh;
+ inst.inst.op2 = op2;
+ inst.inst.lk = lk;
+ emit_instruction(p, inst.bits);
+}
+
+static INLINE void
+dump_xl(const char *name, uint inst)
+{
+ union xl_inst i;
+
+ i.bits = inst;
+ debug_printf("%s = 0x%08x\n", name, inst);
+ debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
+ debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
+ debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
+ debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
+ debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
+ debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
+ debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
+}
+
+
+union x_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned vrs:5;
+ unsigned ra:5;
+ unsigned rb:5;
+ unsigned op2:10;
+ unsigned unused:1;
+ } inst;
+};
+
+static INLINE void
+emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
+ const char *format)
+{
+ union x_inst inst;
+ inst.inst.op = op;
+ inst.inst.vrs = vrs;
+ inst.inst.ra = ra;
+ inst.inst.rb = rb;
+ inst.inst.op2 = op2;
+ inst.inst.unused = 0x0;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, vrs, ra, rb);
+ }
+}
+
+
+union d_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned rt:5;
+ unsigned ra:5;
+ unsigned si:16;
+ } inst;
+};
+
+static INLINE void
+emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
+ const char *format, boolean transpose)
+{
+ union d_inst inst;
+ assert(si >= -32768);
+ assert(si <= 32767);
+ inst.inst.op = op;
+ inst.inst.rt = rt;
+ inst.inst.ra = ra;
+ inst.inst.si = (unsigned) (si & 0xffff);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ if (transpose)
+ printf(format, rt, si, ra);
+ else
+ printf(format, rt, ra, si);
+ }
+}
+
+
+union a_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned frt:5;
+ unsigned fra:5;
+ unsigned frb:5;
+ unsigned unused:5;
+ unsigned op2:5;
+ unsigned rc:1;
+ } inst;
+};
+
+static INLINE void
+emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
+ uint rc, const char *format)
+{
+ union a_inst inst;
+ inst.inst.op = op;
+ inst.inst.frt = frt;
+ inst.inst.fra = fra;
+ inst.inst.frb = frb;
+ inst.inst.unused = 0x0;
+ inst.inst.op2 = op2;
+ inst.inst.rc = rc;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, frt, fra, frb);
+ }
+}
+
+
+union xo_inst {
+ uint32_t bits;
+ struct {
+ unsigned op:6;
+ unsigned rt:5;
+ unsigned ra:5;
+ unsigned rb:5;
+ unsigned oe:1;
+ unsigned op2:9;
+ unsigned rc:1;
+ } inst;
+};
+
+static INLINE void
+emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
+ uint op2, uint rc, const char *format)
+{
+ union xo_inst inst;
+ inst.inst.op = op;
+ inst.inst.rt = rt;
+ inst.inst.ra = ra;
+ inst.inst.rb = rb;
+ inst.inst.oe = oe;
+ inst.inst.op2 = op2;
+ inst.inst.rc = rc;
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf(format, rt, ra, rb);
+ }
+}
+
+
+
+
+
+/**
+ ** float vector arithmetic
+ **/
+
+/** vector float add */
+void
+ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float substract */
+void
+ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float min */
+void
+ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float max */
+void
+ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector float mult add: vD = vA * vB + vC */
+void
+ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ /* note arg order */
+ emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
+}
+
+/** vector float negative mult subtract: vD = vA - vB * vC */
+void
+ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ /* note arg order */
+ emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
+}
+
+/** vector float compare greater than */
+void
+ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
+}
+
+/** vector float compare greater than or equal to */
+void
+ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
+}
+
+/** vector float compare equal */
+void
+ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
+}
+
+/** vector float 2^x */
+void
+ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float log2(x) */
+void
+ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float reciprocol */
+void
+ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float reciprocol sqrt estimate */
+void
+ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to negative infinity */
+void
+ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to positive infinity */
+void
+ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to nearest int */
+void
+ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector float round to int toward zero */
+void
+ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
+{
+ emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
+}
+
+/** vector store: store vR at mem[rA+rB] */
+void
+ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
+}
+
+/** vector load: vR = mem[rA+rB] */
+void
+ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
+}
+
+/** load vector element word: vR = mem_word[ra+rb] */
+void
+ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
+{
+ emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
+}
+
+
+
+
+/**
+ ** vector bitwise operations
+ **/
+
+/** vector and */
+void
+ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector and complement */
+void
+ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector or */
+void
+ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector nor */
+void
+ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** vector xor */
+void
+ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
+}
+
+/** Pseudo-instruction: vector move */
+void
+ppc_vmove(struct ppc_function *p, uint vD, uint vA)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_vor(p, vD, vA, vA);
+ if (print) {
+ indent(p);
+ printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
+ }
+ p->print = print;
+}
+
+/** Set vector register to {0,0,0,0} */
+void
+ppc_vzero(struct ppc_function *p, uint vr)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_vxor(p, vr, vr, vr);
+ if (print) {
+ indent(p);
+ printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
+ }
+ p->print = print;
+}
+
+
+
+
+/**
+ ** Vector shuffle / select / splat / etc
+ **/
+
+/** vector permute */
+void
+ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
+}
+
+/** vector select */
+void
+ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
+{
+ emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
+}
+
+/** vector splat byte */
+void
+ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat half word */
+void
+ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat word */
+void
+ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
+{
+ emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
+}
+
+/** vector splat signed immediate word */
+void
+ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
+{
+ assert(imm >= -16);
+ assert(imm < 15);
+ emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
+}
+
+/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
+void
+ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
+{
+ emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
+}
+
+
+
+
+/**
+ ** integer arithmetic
+ **/
+
+/** rt = ra + imm */
+void
+ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra + (imm << 16) */
+void
+ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra + rb */
+void
+ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
+}
+
+/** rt = ra AND ra */
+void
+ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra AND imm */
+void
+ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra OR ra */
+void
+ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra OR imm */
+void
+ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
+}
+
+/** rt = ra XOR ra */
+void
+ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
+{
+ emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */
+}
+
+/** rt = ra XOR imm */
+void
+ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
+{
+ /* note argument order */
+ emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
+}
+
+/** pseudo instruction: move: rt = ra */
+void
+ppc_mr(struct ppc_function *p, uint rt, uint ra)
+{
+ ppc_or(p, rt, ra, ra);
+}
+
+/** pseudo instruction: load immediate: rt = imm */
+void
+ppc_li(struct ppc_function *p, uint rt, int imm)
+{
+ boolean print = p->print;
+ p->print = FALSE;
+ ppc_addi(p, rt, 0, imm);
+ if (print) {
+ indent(p);
+ printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
+ }
+ p->print = print;
+}
+
+/** rt = imm << 16 */
+void
+ppc_lis(struct ppc_function *p, uint rt, int imm)
+{
+ ppc_addis(p, rt, 0, imm);
+}
+
+/** rt = imm */
+void
+ppc_load_int(struct ppc_function *p, uint rt, int imm)
+{
+ ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */
+ ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */
+}
+
+
+
+
+/**
+ ** integer load/store
+ **/
+
+/** store rs at memory[(ra)+d],
+ * then update ra = (ra)+d
+ */
+void
+ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
+{
+ emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** store rs at memory[(ra)+d] */
+void
+ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
+{
+ emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */
+void
+ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
+{
+ emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
+}
+
+
+
+/**
+ ** Float (non-vector) arithmetic
+ **/
+
+/** add: frt = fra + frb */
+void
+ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
+{
+ emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
+}
+
+/** sub: frt = fra - frb */
+void
+ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
+{
+ emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
+}
+
+/** convert to int: rt = (int) ra */
+void
+ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
+{
+ emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
+}
+
+/** store frs at mem[(ra)+offset] */
+void
+ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
+{
+ emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
+}
+
+/** store frs at mem[(ra)+(rb)] */
+void
+ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
+{
+ emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
+}
+
+/** load frt = mem[(ra)+offset] */
+void
+ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
+{
+ emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
+}
+
+
+
+
+
+/**
+ ** branch instructions
+ **/
+
+/** BLR: Branch to link register (p. 35) */
+void
+ppc_blr(struct ppc_function *p)
+{
+ emit_i(p, 18, 0, 0, 1);
+ if (p->print) {
+ indent(p);
+ printf("blr\n");
+ }
+}
+
+/** Branch Conditional to Link Register (p. 36) */
+void
+ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
+{
+ emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
+ if (p->print) {
+ indent(p);
+ printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
+ }
+}
+
+/** Pseudo instruction: return from subroutine */
+void
+ppc_return(struct ppc_function *p)
+{
+ ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
+}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
new file mode 100644
index 0000000000..93e5f5187d
--- /dev/null
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
@@ -0,0 +1,342 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * PPC code generation.
+ * \author Brian Paul
+ */
+
+
+#ifndef RTASM_PPC_H
+#define RTASM_PPC_H
+
+
+#include "pipe/p_compiler.h"
+
+
+#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */
+
+#define PPC_NUM_REGS 32
+#define PPC_NUM_FP_REGS 32
+#define PPC_NUM_VEC_REGS 32
+
+/** Stack pointer register */
+#define PPC_REG_SP 1
+
+/** Branch conditions */
+#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */
+
+/** Branch hints */
+#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */
+
+
+struct ppc_function
+{
+ uint32_t *store; /**< instruction buffer */
+ uint num_inst;
+ uint max_inst;
+ uint32_t reg_used; /** used/free general-purpose registers bitmask */
+ uint32_t fp_used; /** used/free floating point registers bitmask */
+ uint32_t vec_used; /** used/free vector registers bitmask */
+ int indent;
+ boolean print;
+};
+
+
+
+extern void ppc_init_func(struct ppc_function *p);
+extern void ppc_release_func(struct ppc_function *p);
+extern uint ppc_num_instructions(const struct ppc_function *p);
+extern void (*ppc_get_func( struct ppc_function *p ))( void );
+extern void ppc_dump_func(const struct ppc_function *p);
+
+extern void ppc_print_code(struct ppc_function *p, boolean enable);
+extern void ppc_indent(struct ppc_function *p, int spaces);
+extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s);
+
+extern int ppc_reserve_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_register(struct ppc_function *p);
+extern void ppc_release_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_fp_register(struct ppc_function *p);
+extern void ppc_release_fp_register(struct ppc_function *p, int reg);
+extern int ppc_allocate_vec_register(struct ppc_function *p);
+extern void ppc_release_vec_register(struct ppc_function *p, int reg);
+
+
+
+/**
+ ** float vector arithmetic
+ **/
+
+/** vector float add */
+extern void
+ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB);
+
+/** vector float substract */
+extern void
+ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float min */
+extern void
+ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float max */
+extern void
+ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float mult add: vD = vA * vB + vC */
+extern void
+ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector float negative mult subtract: vD = vA - vB * vC */
+extern void
+ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector float compare greater than */
+extern void
+ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float compare greater than or equal to */
+extern void
+ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float compare equal */
+extern void
+ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector float 2^x */
+extern void
+ppc_vexptefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float log2(x) */
+extern void
+ppc_vlogefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float reciprocol */
+extern void
+ppc_vrefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float reciprocol sqrt estimate */
+extern void
+ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to negative infinity */
+extern void
+ppc_vrfim(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to positive infinity */
+extern void
+ppc_vrfip(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to nearest int */
+extern void
+ppc_vrfin(struct ppc_function *p, uint vD, uint vB);
+
+/** vector float round to int toward zero */
+extern void
+ppc_vrfiz(struct ppc_function *p, uint vD, uint vB);
+
+
+/** vector store: store vR at mem[vA+vB] */
+extern void
+ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+/** vector load: vR = mem[vA+vB] */
+extern void
+ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+/** load vector element word: vR = mem_word[vA+vB] */
+extern void
+ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB);
+
+
+
+/**
+ ** vector bitwise operations
+ **/
+
+
+/** vector and */
+extern void
+ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector and complement */
+extern void
+ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector or */
+extern void
+ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector nor */
+extern void
+ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** vector xor */
+extern void
+ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+/** Pseudo-instruction: vector move */
+extern void
+ppc_vmove(struct ppc_function *p, uint vD, uint vA);
+
+/** Set vector register to {0,0,0,0} */
+extern void
+ppc_vzero(struct ppc_function *p, uint vr);
+
+
+
+/**
+ ** Vector shuffle / select / splat / etc
+ **/
+
+/** vector permute */
+extern void
+ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector select */
+extern void
+ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC);
+
+/** vector splat byte */
+extern void
+ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat half word */
+extern void
+ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat word */
+extern void
+ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm);
+
+/** vector splat signed immediate word */
+extern void
+ppc_vspltisw(struct ppc_function *p, uint vD, int imm);
+
+/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
+extern void
+ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB);
+
+
+
+/**
+ ** scalar arithmetic
+ **/
+
+extern void
+ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb);
+
+extern void
+ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm);
+
+extern void
+ppc_mr(struct ppc_function *p, uint rt, uint ra);
+
+extern void
+ppc_li(struct ppc_function *p, uint rt, int imm);
+
+extern void
+ppc_lis(struct ppc_function *p, uint rt, int imm);
+
+extern void
+ppc_load_int(struct ppc_function *p, uint rt, int imm);
+
+
+
+/**
+ ** scalar load/store
+ **/
+
+extern void
+ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d);
+
+extern void
+ppc_stw(struct ppc_function *p, uint rs, uint ra, int d);
+
+extern void
+ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d);
+
+
+
+/**
+ ** Float (non-vector) arithmetic
+ **/
+
+extern void
+ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb);
+
+extern void
+ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb);
+
+extern void
+ppc_fctiwz(struct ppc_function *p, uint rt, uint ra);
+
+extern void
+ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset);
+
+extern void
+ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb);
+
+extern void
+ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset);
+
+
+
+/**
+ ** branch instructions
+ **/
+
+extern void
+ppc_blr(struct ppc_function *p);
+
+void
+ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg);
+
+extern void
+ppc_return(struct ppc_function *p);
+
+
+#endif /* RTASM_PPC_H */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index a04cc6c4ff..53a0e722cf 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -27,12 +27,16 @@
* Real-time assembly generation interface for Cell B.E. SPEs.
*
* \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
+
+#include <stdio.h>
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "rtasm_ppc_spe.h"
+
#ifdef GALLIUM_CELL
/**
* SPE instruction types
@@ -143,21 +147,91 @@ union spe_inst_RI18 {
/*@}*/
-static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, unsigned rB)
+static void
+indent(const struct spe_function *p)
+{
+ int i;
+ for (i = 0; i < p->indent; i++) {
+ putchar(' ');
+ }
+}
+
+
+static const char *
+rem_prefix(const char *longname)
+{
+ return longname + 4;
+}
+
+
+static const char *
+reg_name(int reg)
+{
+ switch (reg) {
+ case SPE_REG_SP:
+ return "$sp";
+ case SPE_REG_RA:
+ return "$lr";
+ default:
+ {
+ /* cycle through four buffers to handle multiple calls per printf */
+ static char buf[4][10];
+ static int b = 0;
+ b = (b + 1) % 4;
+ sprintf(buf[b], "$%d", reg);
+ return buf[b];
+ }
+ }
+}
+
+
+static void
+emit_instruction(struct spe_function *p, uint32_t inst_bits)
+{
+ if (!p->store)
+ return; /* out of memory, drop the instruction */
+
+ if (p->num_inst == p->max_inst) {
+ /* allocate larger buffer */
+ uint32_t *newbuf;
+ p->max_inst *= 2; /* 2x larger */
+ newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16);
+ if (newbuf) {
+ memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE);
+ }
+ align_free(p->store);
+ p->store = newbuf;
+ if (!p->store) {
+ /* out of memory */
+ p->num_inst = 0;
+ return;
+ }
+ }
+
+ p->store[p->num_inst++] = inst_bits;
+}
+
+
+
+static void emit_RR(struct spe_function *p, unsigned op, int rT,
+ int rA, int rB, const char *name)
{
union spe_inst_RR inst;
inst.inst.op = op;
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, %s\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB));
+ }
}
-static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, unsigned rB, unsigned rC)
+static void emit_RRR(struct spe_function *p, unsigned op, int rT,
+ int rA, int rB, int rC, const char *name)
{
union spe_inst_RRR inst;
inst.inst.op = op;
@@ -165,155 +239,212 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rC = rC;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT),
+ reg_name(rA), reg_name(rB), reg_name(rC));
+ }
}
-static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI7(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI7 inst;
inst.inst.op = op;
inst.inst.i7 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI8(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI8 inst;
inst.inst.op = op;
inst.inst.i8 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
- unsigned rA, int imm)
+static void emit_RI10(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
{
union spe_inst_RI10 inst;
inst.inst.op = op;
inst.inst.i10 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
+ }
}
-static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
- int imm)
+/** As above, but do range checking on signed immediate value */
+static void emit_RI10s(struct spe_function *p, unsigned op, int rT,
+ int rA, int imm, const char *name)
+{
+ assert(imm <= 511);
+ assert(imm >= -512);
+ emit_RI10(p, op, rT, rA, imm, name);
+}
+
+
+static void emit_RI16(struct spe_function *p, unsigned op, int rT,
+ int imm, const char *name)
{
union spe_inst_RI16 inst;
inst.inst.op = op;
inst.inst.i16 = imm;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
+ }
}
-static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
- int imm)
+static void emit_RI18(struct spe_function *p, unsigned op, int rT,
+ int imm, const char *name)
{
union spe_inst_RI18 inst;
inst.inst.op = op;
inst.inst.i18 = imm;
inst.inst.rT = rT;
- p->store[p->num_inst++] = inst.bits;
- assert(p->num_inst <= p->max_inst);
+ emit_instruction(p, inst.bits);
+ if (p->print) {
+ indent(p);
+ printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
+ }
}
-
+#define EMIT(_name, _op) \
+void _name (struct spe_function *p) \
+{ \
+ emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \
+}
#define EMIT_(_name, _op) \
-void _name (struct spe_function *p, unsigned rT) \
+void _name (struct spe_function *p, int rT) \
{ \
- emit_RR(p, _op, rT, 0, 0); \
+ emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \
}
#define EMIT_R(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA) \
+void _name (struct spe_function *p, int rT, int rA) \
{ \
- emit_RR(p, _op, rT, rA, 0); \
+ emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \
}
#define EMIT_RR(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \
+void _name (struct spe_function *p, int rT, int rA, int rB) \
{ \
- emit_RR(p, _op, rT, rA, rB); \
+ emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \
}
#define EMIT_RRR(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \
+void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \
{ \
- emit_RRR(p, _op, rT, rA, rB, rC); \
+ emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \
}
#define EMIT_RI7(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI7(p, _op, rT, rA, imm); \
+ emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \
}
#define EMIT_RI8(_name, _op, bias) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI8(p, _op, rT, rA, bias - imm); \
+ emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \
}
#define EMIT_RI10(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
{ \
- emit_RI10(p, _op, rT, rA, imm); \
+ emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \
+}
+
+#define EMIT_RI10s(_name, _op) \
+void _name (struct spe_function *p, int rT, int rA, int imm) \
+{ \
+ emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \
}
#define EMIT_RI16(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, int imm) \
+void _name (struct spe_function *p, int rT, int imm) \
{ \
- emit_RI16(p, _op, rT, imm); \
+ emit_RI16(p, _op, rT, imm, __FUNCTION__); \
}
#define EMIT_RI18(_name, _op) \
-void _name (struct spe_function *p, unsigned rT, int imm) \
+void _name (struct spe_function *p, int rT, int imm) \
{ \
- emit_RI18(p, _op, rT, imm); \
+ emit_RI18(p, _op, rT, imm, __FUNCTION__); \
}
#define EMIT_I16(_name, _op) \
void _name (struct spe_function *p, int imm) \
{ \
- emit_RI16(p, _op, 0, imm); \
+ emit_RI16(p, _op, 0, imm, __FUNCTION__); \
}
#include "rtasm_ppc_spe.h"
+
/**
* Initialize an spe_function.
- * \param code_size size of instruction buffer to allocate, in bytes.
+ * \param code_size initial size of instruction buffer to allocate, in bytes.
+ * If zero, use a default.
*/
void spe_init_func(struct spe_function *p, unsigned code_size)
{
- p->store = align_malloc(code_size, 16);
+ uint i;
+
+ if (!code_size)
+ code_size = 64;
+
p->num_inst = 0;
p->max_inst = code_size / SPE_INST_SIZE;
+ p->store = align_malloc(code_size, 16);
+
+ p->set_count = 0;
+ memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
*/
- p->regs[0] = ~7;
- p->regs[1] = (1U << (80 - 64)) - 1;
+ p->regs[0] = p->regs[1] = p->regs[2] = 1;
+ for (i = 80; i <= 127; i++) {
+ p->regs[i] = 1;
+ }
+
+ p->print = FALSE;
+ p->indent = 0;
}
@@ -327,20 +458,23 @@ void spe_release_func(struct spe_function *p)
}
+/** Return current code size in bytes. */
+unsigned spe_code_size(const struct spe_function *p)
+{
+ return p->num_inst * SPE_INST_SIZE;
+}
+
+
/**
- * Alloate a SPE register.
+ * Allocate a SPE register.
* \return register index or -1 if none left.
*/
int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
for (i = 0; i < SPE_NUM_REGS; i++) {
- const uint64_t mask = (1ULL << (i % 64));
- const unsigned idx = i / 64;
-
- assert(idx < 2);
- if ((p->regs[idx] & mask) != 0) {
- p->regs[idx] &= ~mask;
+ if (p->regs[i] == 0) {
+ p->regs[i] = 1;
return i;
}
}
@@ -354,31 +488,161 @@ int spe_allocate_available_register(struct spe_function *p)
*/
int spe_allocate_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 64;
- const unsigned bit = reg % 64;
-
assert(reg < SPE_NUM_REGS);
- assert((p->regs[idx] & (1ULL << bit)) != 0);
-
- p->regs[idx] &= ~(1ULL << bit);
+ assert(p->regs[reg] == 0);
+ p->regs[reg] = 1;
return reg;
}
/**
- * Mark the given SPE register as "unallocated".
+ * Mark the given SPE register as "unallocated". Note that this should
+ * only be used on registers allocated in the current register set; an
+ * assertion will fail if an attempt is made to deallocate a register
+ * allocated in an earlier register set.
*/
void spe_release_register(struct spe_function *p, int reg)
{
- const unsigned idx = reg / 64;
- const unsigned bit = reg % 64;
+ assert(reg >= 0);
+ assert(reg < SPE_NUM_REGS);
+ assert(p->regs[reg] == 1);
- assert(idx < 2);
+ p->regs[reg] = 0;
+}
- assert(reg < SPE_NUM_REGS);
- assert((p->regs[idx] & (1ULL << bit)) == 0);
+/**
+ * Start a new set of registers. This can be called if
+ * it will be difficult later to determine exactly what
+ * registers were actually allocated during a code generation
+ * sequence, and you really just want to deallocate all of them.
+ */
+void spe_allocate_register_set(struct spe_function *p)
+{
+ uint i;
+
+ /* Keep track of the set count. If it ever wraps around to 0,
+ * we're in trouble.
+ */
+ p->set_count++;
+ assert(p->set_count > 0);
+
+ /* Increment the allocation count of all registers currently
+ * allocated. Then any registers that are allocated in this set
+ * will be the only ones with a count of 1; they'll all be released
+ * when the register set is released.
+ */
+ for (i = 0; i < SPE_NUM_REGS; i++) {
+ if (p->regs[i] > 0)
+ p->regs[i]++;
+ }
+}
+
+void spe_release_register_set(struct spe_function *p)
+{
+ uint i;
+
+ /* If the set count drops below zero, we're in trouble. */
+ assert(p->set_count > 0);
+ p->set_count--;
- p->regs[idx] |= (1ULL << bit);
+ /* Drop the allocation level of all registers. Any allocated
+ * during this register set will drop to 0 and then become
+ * available.
+ */
+ for (i = 0; i < SPE_NUM_REGS; i++) {
+ if (p->regs[i] > 0)
+ p->regs[i]--;
+ }
+}
+
+
+unsigned
+spe_get_registers_used(const struct spe_function *p, ubyte used[])
+{
+ unsigned i, num = 0;
+ /* only count registers in the range available to callers */
+ for (i = 2; i < 80; i++) {
+ if (p->regs[i]) {
+ used[num++] = i;
+ }
+ }
+ return num;
+}
+
+
+void
+spe_print_code(struct spe_function *p, boolean enable)
+{
+ p->print = enable;
+}
+
+
+void
+spe_indent(struct spe_function *p, int spaces)
+{
+ p->indent += spaces;
+}
+
+
+void
+spe_comment(struct spe_function *p, int rel_indent, const char *s)
+{
+ if (p->print) {
+ p->indent += rel_indent;
+ indent(p);
+ p->indent -= rel_indent;
+ printf("# %s\n", s);
+ }
+}
+
+
+/**
+ * Load quad word.
+ * NOTE: offset is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_lqd(struct spe_function *p, int rT, int rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ /* offset must be a multiple of 16 */
+ assert(offset % 16 == 0);
+ /* offset must fit in 10-bit signed int field, after shifting */
+ assert((offset >> 4) <= 511);
+ assert((offset >> 4) >= -512);
+
+ p->print = FALSE;
+ emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
+}
+
+
+/**
+ * Store quad word.
+ * NOTE: offset is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_stqd(struct spe_function *p, int rT, int rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ /* offset must be a multiple of 16 */
+ assert(offset % 16 == 0);
+ /* offset must fit in 10-bit signed int field, after shifting */
+ assert((offset >> 4) <= 511);
+ assert((offset >> 4) >= -512);
+
+ p->print = FALSE;
+ emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
}
@@ -390,53 +654,53 @@ void spe_release_register(struct spe_function *p, int reg)
*/
/** Branch Indirect to address in rA */
-void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
+void spe_bi(struct spe_function *p, int rA, int d, int e)
{
- emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Interupt Return */
-void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
+void spe_iret(struct spe_function *p, int rA, int d, int e)
{
- emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect and set link on external data */
-void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
+void spe_bisled(struct spe_function *p, int rT, int rA, int d,
int e)
{
- emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect and set link. Save PC in rT, jump to rA. */
-void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
+void spe_bisl(struct spe_function *p, int rT, int rA, int d,
int e)
{
- emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
-void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_biz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
-void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_binz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
-void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
-void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
+void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e)
{
- emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
+ emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
}
@@ -454,7 +718,6 @@ hbrr;
#if 0
stop;
EMIT_RR (spe_stopd, 0x140);
-EMIT_ (spe_lnop, 0x001);
EMIT_ (spe_nop, 0x201);
sync;
EMIT_ (spe_dsync, 0x003);
@@ -471,7 +734,7 @@ EMIT_R (spe_mtspr, 0x10c);
void
-spe_load_float(struct spe_function *p, unsigned rT, float x)
+spe_load_float(struct spe_function *p, int rT, float x)
{
if (x == 0.0f) {
spe_il(p, rT, 0x0);
@@ -498,45 +761,307 @@ spe_load_float(struct spe_function *p, unsigned rT, float x)
void
-spe_load_int(struct spe_function *p, unsigned rT, int i)
+spe_load_int(struct spe_function *p, int rT, int i)
{
if (-32768 <= i && i <= 32767) {
spe_il(p, rT, i);
}
else {
spe_ilhu(p, rT, i >> 16);
- spe_iohl(p, rT, i & 0xffff);
+ if (i & 0xffff)
+ spe_iohl(p, rT, i & 0xffff);
}
}
+void spe_load_uint(struct spe_function *p, int rT, uint ui)
+{
+ /* If the whole value is in the lower 18 bits, use ila, which
+ * doesn't sign-extend. Otherwise, if the two halfwords of
+ * the constant are identical, use ilh. Otherwise, if every byte of
+ * the desired value is 0x00 or 0xff, we can use Form Select Mask for
+ * Bytes Immediate (fsmbi) to load the value in a single instruction.
+ * Otherwise, in the general case, we have to use ilhu followed by iohl.
+ */
+ if ((ui & 0x0003ffff) == ui) {
+ spe_ila(p, rT, ui);
+ }
+ else if ((ui >> 16) == (ui & 0xffff)) {
+ spe_ilh(p, rT, ui & 0xffff);
+ }
+ else if (
+ ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
+ ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
+ ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
+ ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
+ ) {
+ uint mask = 0;
+ /* fsmbi duplicates each bit in the given mask eight times,
+ * using a 16-bit value to initialize a 16-byte quadword.
+ * Each 4-bit nybble of the mask corresponds to a full word
+ * of the result; look at the value and figure out the mask
+ * (replicated for each word in the quadword), and then
+ * form the "select mask" to get the value.
+ */
+ if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
+ if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
+ if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
+ if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
+ spe_fsmbi(p, rT, mask);
+ }
+ else {
+ /* The general case: this usually uses two instructions, but
+ * may use only one if the low-order 16 bits of each word are 0.
+ */
+ spe_ilhu(p, rT, ui >> 16);
+ if (ui & 0xffff)
+ spe_iohl(p, rT, ui & 0xffff);
+ }
+}
+/**
+ * This function is constructed identically to spe_xor_uint() below.
+ * Changes to one should be made in the other.
+ */
void
-spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
+spe_and_uint(struct spe_function *p, int rT, int rA, uint ui)
{
- spe_ila(p, rT, 66051);
- spe_shufb(p, rT, rA, rA, rT);
+ /* If we can, emit a single instruction, either And Byte Immediate
+ * (which uses the same constant across each byte), And Halfword Immediate
+ * (which sign-extends a 10-bit immediate to 16 bits and uses that
+ * across each halfword), or And Word Immediate (which sign-extends
+ * a 10-bit immediate to 32 bits).
+ *
+ * Otherwise, we'll need to use a temporary register.
+ */
+ uint tmp;
+
+ /* If the upper 23 bits are all 0s or all 1s, sign extension
+ * will work and we can use And Word Immediate
+ */
+ tmp = ui & 0xfffffe00;
+ if (tmp == 0xfffffe00 || tmp == 0) {
+ spe_andi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric along halfword boundaries and
+ * the upper 7 bits of each halfword are all 0s or 1s, we
+ * can use And Halfword Immediate
+ */
+ tmp = ui & 0xfe00fe00;
+ if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
+ spe_andhi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric in each byte, then we can use
+ * the And Byte Immediate instruction.
+ */
+ tmp = ui & 0x000000ff;
+ if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
+ spe_andbi(p, rT, rA, tmp);
+ return;
+ }
+
+ /* Otherwise, we'll have to use a temporary register. */
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_and(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
}
+/**
+ * This function is constructed identically to spe_and_uint() above.
+ * Changes to one should be made in the other.
+ */
void
-spe_complement(struct spe_function *p, unsigned rT)
+spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui)
{
- spe_nor(p, rT, rT, rT);
+ /* If we can, emit a single instruction, either Exclusive Or Byte
+ * Immediate (which uses the same constant across each byte), Exclusive
+ * Or Halfword Immediate (which sign-extends a 10-bit immediate to
+ * 16 bits and uses that across each halfword), or Exclusive Or Word
+ * Immediate (which sign-extends a 10-bit immediate to 32 bits).
+ *
+ * Otherwise, we'll need to use a temporary register.
+ */
+ uint tmp;
+
+ /* If the upper 23 bits are all 0s or all 1s, sign extension
+ * will work and we can use Exclusive Or Word Immediate
+ */
+ tmp = ui & 0xfffffe00;
+ if (tmp == 0xfffffe00 || tmp == 0) {
+ spe_xori(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric along halfword boundaries and
+ * the upper 7 bits of each halfword are all 0s or 1s, we
+ * can use Exclusive Or Halfword Immediate
+ */
+ tmp = ui & 0xfe00fe00;
+ if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
+ spe_xorhi(p, rT, rA, ui & 0x000003ff);
+ return;
+ }
+
+ /* If the ui field is symmetric in each byte, then we can use
+ * the Exclusive Or Byte Immediate instruction.
+ */
+ tmp = ui & 0x000000ff;
+ if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
+ spe_xorbi(p, rT, rA, tmp);
+ return;
+ }
+
+ /* Otherwise, we'll have to use a temporary register. */
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_xor(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+}
+
+void
+spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui)
+{
+ /* If the comparison value is 9 bits or less, it fits inside a
+ * Compare Equal Word Immediate instruction.
+ */
+ if ((ui & 0x000001ff) == ui) {
+ spe_ceqi(p, rT, rA, ui);
+ }
+ /* Otherwise, we're going to have to load a word first. */
+ else {
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_ceq(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+ }
+}
+
+void
+spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui)
+{
+ /* If the comparison value is 10 bits or less, it fits inside a
+ * Compare Logical Greater Than Word Immediate instruction.
+ */
+ if ((ui & 0x000003ff) == ui) {
+ spe_clgti(p, rT, rA, ui);
+ }
+ /* Otherwise, we're going to have to load a word first. */
+ else {
+ int tmp_reg = spe_allocate_available_register(p);
+ spe_load_uint(p, tmp_reg, ui);
+ spe_clgt(p, rT, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+ }
+}
+
+void
+spe_splat(struct spe_function *p, int rT, int rA)
+{
+ /* Use a temporary, just in case rT == rA */
+ int tmp_reg = spe_allocate_available_register(p);
+ /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
+ spe_ila(p, tmp_reg, 0x00010203);
+ spe_shufb(p, rT, rA, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
+}
+
+
+void
+spe_complement(struct spe_function *p, int rT, int rA)
+{
+ spe_nor(p, rT, rA, rA);
}
void
-spe_move(struct spe_function *p, unsigned rT, unsigned rA)
+spe_move(struct spe_function *p, int rT, int rA)
{
- spe_ori(p, rT, rA, 0);
+ /* Use different instructions depending on the instruction address
+ * to take advantage of the dual pipelines.
+ */
+ if (p->num_inst & 1)
+ spe_shlqbyi(p, rT, rA, 0); /* odd pipe */
+ else
+ spe_ori(p, rT, rA, 0); /* even pipe */
}
void
-spe_zero(struct spe_function *p, unsigned rT)
+spe_zero(struct spe_function *p, int rT)
{
spe_xor(p, rT, rT, rT);
}
+void
+spe_splat_word(struct spe_function *p, int rT, int rA, int word)
+{
+ assert(word >= 0);
+ assert(word <= 3);
+
+ if (word == 0) {
+ int tmp1 = rT;
+ spe_ila(p, tmp1, 66051);
+ spe_shufb(p, rT, rA, rA, tmp1);
+ }
+ else {
+ /* XXX review this, we may not need the rotqbyi instruction */
+ int tmp1 = rT;
+ int tmp2 = spe_allocate_available_register(p);
+
+ spe_ila(p, tmp1, 66051);
+ spe_rotqbyi(p, tmp2, rA, 4 * word);
+ spe_shufb(p, rT, tmp2, tmp2, tmp1);
+
+ spe_release_register(p, tmp2);
+ }
+}
+
+/**
+ * For each 32-bit float element of rA and rB, choose the smaller of the
+ * two, compositing them into the rT register.
+ *
+ * The Float Compare Greater Than (fcgt) instruction will put 1s into
+ * compare_reg where rA > rB, and 0s where rA <= rB.
+ *
+ * Then the Select Bits (selb) instruction will take bits from rA where
+ * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
+ * where rA <= rB and from rB where rB > rA, which is exactly the
+ * "min" operation.
+ *
+ * The compare_reg could in many cases be the same as rT, unless
+ * rT == rA || rt == rB. But since this is common in constructions
+ * like "x = min(x, a)", we always allocate a new register to be safe.
+ */
+void
+spe_float_min(struct spe_function *p, int rT, int rA, int rB)
+{
+ int compare_reg = spe_allocate_available_register(p);
+ spe_fcgt(p, compare_reg, rA, rB);
+ spe_selb(p, rT, rA, rB, compare_reg);
+ spe_release_register(p, compare_reg);
+}
+
+/**
+ * For each 32-bit float element of rA and rB, choose the greater of the
+ * two, compositing them into the rT register.
+ *
+ * The logic is similar to that of spe_float_min() above; the only
+ * difference is that the registers on spe_selb() have been reversed,
+ * so that the larger of the two is selected instead of the smaller.
+ */
+void
+spe_float_max(struct spe_function *p, int rT, int rA, int rB)
+{
+ int compare_reg = spe_allocate_available_register(p);
+ spe_fcgt(p, compare_reg, rA, rB);
+ spe_selb(p, rT, rB, rA, compare_reg);
+ spe_release_register(p, compare_reg);
+}
+
#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index d95e5aace3..65d9c77415 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -28,6 +28,7 @@
* For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
*
* \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
#ifndef RTASM_PPC_SPE_H
@@ -39,10 +40,10 @@
/** number of general-purpose SIMD registers */
#define SPE_NUM_REGS 128
-/** Return Address register */
+/** Return Address register (aka $lr / Link Register) */
#define SPE_REG_RA 0
-/** Stack Pointer register */
+/** Stack Pointer register (aka $sp) */
#define SPE_REG_SP 1
@@ -52,308 +53,371 @@ struct spe_function
uint num_inst;
uint max_inst;
- /**
- * Mask of used / unused registers
- *
- * Each set bit corresponds to an available register. Each cleared bit
- * corresponds to an allocated register.
+ /**
+ * The "set count" reflects the number of nested register sets
+ * are allowed. In the unlikely case that we exceed the set count,
+ * register allocation will start to be confused, which is critical
+ * enough that we check for it.
+ */
+ unsigned char set_count;
+
+ /**
+ * Flags for used and unused registers. Each byte corresponds to a
+ * register; a 0 in that byte means that the register is available.
+ * A value of 1 means that the register was allocated in the current
+ * register set. Any other value N means that the register was allocated
+ * N register sets ago.
*
* \sa
* spe_allocate_register, spe_allocate_available_register,
- * spe_release_register
+ * spe_allocate_register_set, spe_release_register_set, spe_release_register,
*/
- uint64_t regs[SPE_NUM_REGS / 64];
+ unsigned char regs[SPE_NUM_REGS];
+
+ boolean print; /**< print/dump instructions as they're emitted? */
+ int indent; /**< number of spaces to indent */
};
-extern void spe_init_func(struct spe_function *p, unsigned code_size);
+
+extern void spe_init_func(struct spe_function *p, uint code_size);
extern void spe_release_func(struct spe_function *p);
+extern uint spe_code_size(const struct spe_function *p);
extern int spe_allocate_available_register(struct spe_function *p);
extern int spe_allocate_register(struct spe_function *p, int reg);
extern void spe_release_register(struct spe_function *p, int reg);
+extern void spe_allocate_register_set(struct spe_function *p);
+extern void spe_release_register_set(struct spe_function *p);
+
+extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]);
+
+extern void spe_print_code(struct spe_function *p, boolean enable);
+extern void spe_indent(struct spe_function *p, int spaces);
+extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
+
#endif /* RTASM_PPC_SPE_H */
-#ifndef EMIT_
-#define EMIT_(name, _op) \
- extern void _name (struct spe_function *p, unsigned rT)
+#ifndef EMIT
+#define EMIT(_name, _op) \
+ extern void _name (struct spe_function *p);
+#define EMIT_(_name, _op) \
+ extern void _name (struct spe_function *p, int rT);
#define EMIT_R(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
+ extern void _name (struct spe_function *p, int rT, int rA);
#define EMIT_RR(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB)
+ extern void _name (struct spe_function *p, int rT, int rA, int rB);
#define EMIT_RRR(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- unsigned rB, unsigned rC)
+ extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC);
#define EMIT_RI7(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI8(_name, _op, bias) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI10(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
- int imm)
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
+#define EMIT_RI10s(_name, _op) \
+ extern void _name (struct spe_function *p, int rT, int rA, int imm);
#define EMIT_RI16(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, int rT, int imm);
#define EMIT_RI18(_name, _op) \
- extern void _name (struct spe_function *p, unsigned rT, int imm)
+ extern void _name (struct spe_function *p, int rT, int imm);
#define EMIT_I16(_name, _op) \
- extern void _name (struct spe_function *p, int imm)
+ extern void _name (struct spe_function *p, int imm);
#define UNDEF_EMIT_MACROS
-#endif /* EMIT_ */
+#endif /* EMIT */
/* Memory load / store instructions
*/
-EMIT_RI10(spe_lqd, 0x034);
-EMIT_RR (spe_lqx, 0x1c4);
-EMIT_RI16(spe_lqa, 0x061);
-EMIT_RI16(spe_lqr, 0x067);
-EMIT_RI10(spe_stqd, 0x024);
-EMIT_RR (spe_stqx, 0x144);
-EMIT_RI16(spe_stqa, 0x041);
-EMIT_RI16(spe_stqr, 0x047);
-EMIT_RI7 (spe_cbd, 0x1f4);
-EMIT_RR (spe_cbx, 0x1d4);
-EMIT_RI7 (spe_chd, 0x1f5);
-EMIT_RI7 (spe_chx, 0x1d5);
-EMIT_RI7 (spe_cwd, 0x1f6);
-EMIT_RI7 (spe_cwx, 0x1d6);
-EMIT_RI7 (spe_cdd, 0x1f7);
-EMIT_RI7 (spe_cdx, 0x1d7);
+EMIT_RR (spe_lqx, 0x1c4)
+EMIT_RI16(spe_lqa, 0x061)
+EMIT_RI16(spe_lqr, 0x067)
+EMIT_RR (spe_stqx, 0x144)
+EMIT_RI16(spe_stqa, 0x041)
+EMIT_RI16(spe_stqr, 0x047)
+EMIT_RI7 (spe_cbd, 0x1f4)
+EMIT_RR (spe_cbx, 0x1d4)
+EMIT_RI7 (spe_chd, 0x1f5)
+EMIT_RI7 (spe_chx, 0x1d5)
+EMIT_RI7 (spe_cwd, 0x1f6)
+EMIT_RI7 (spe_cwx, 0x1d6)
+EMIT_RI7 (spe_cdd, 0x1f7)
+EMIT_RI7 (spe_cdx, 0x1d7)
/* Constant formation instructions
*/
-EMIT_RI16(spe_ilh, 0x083);
-EMIT_RI16(spe_ilhu, 0x082);
-EMIT_RI16(spe_il, 0x081);
-EMIT_RI18(spe_ila, 0x021);
-EMIT_RI16(spe_iohl, 0x0c1);
-EMIT_RI16(spe_fsmbi, 0x065);
+EMIT_RI16(spe_ilh, 0x083)
+EMIT_RI16(spe_ilhu, 0x082)
+EMIT_RI16(spe_il, 0x081)
+EMIT_RI18(spe_ila, 0x021)
+EMIT_RI16(spe_iohl, 0x0c1)
+EMIT_RI16(spe_fsmbi, 0x065)
/* Integer and logical instructions
*/
-EMIT_RR (spe_ah, 0x0c8);
-EMIT_RI10(spe_ahi, 0x01d);
-EMIT_RR (spe_a, 0x0c0);
-EMIT_RI10(spe_ai, 0x01c);
-EMIT_RR (spe_sfh, 0x048);
-EMIT_RI10(spe_sfhi, 0x00d);
-EMIT_RR (spe_sf, 0x040);
-EMIT_RI10(spe_sfi, 0x00c);
-EMIT_RR (spe_addx, 0x340);
-EMIT_RR (spe_cg, 0x0c2);
-EMIT_RR (spe_cgx, 0x342);
-EMIT_RR (spe_sfx, 0x341);
-EMIT_RR (spe_bg, 0x042);
-EMIT_RR (spe_bgx, 0x343);
-EMIT_RR (spe_mpy, 0x3c4);
-EMIT_RR (spe_mpyu, 0x3cc);
-EMIT_RI10(spe_mpyi, 0x074);
-EMIT_RI10(spe_mpyui, 0x075);
-EMIT_RRR (spe_mpya, 0x00c);
-EMIT_RR (spe_mpyh, 0x3c5);
-EMIT_RR (spe_mpys, 0x3c7);
-EMIT_RR (spe_mpyhh, 0x3c6);
-EMIT_RR (spe_mpyhha, 0x346);
-EMIT_RR (spe_mpyhhu, 0x3ce);
-EMIT_RR (spe_mpyhhau, 0x34e);
-EMIT_R (spe_clz, 0x2a5);
-EMIT_R (spe_cntb, 0x2b4);
-EMIT_R (spe_fsmb, 0x1b6);
-EMIT_R (spe_fsmh, 0x1b5);
-EMIT_R (spe_fsm, 0x1b4);
-EMIT_R (spe_gbb, 0x1b2);
-EMIT_R (spe_gbh, 0x1b1);
-EMIT_R (spe_gb, 0x1b0);
-EMIT_RR (spe_avgb, 0x0d3);
-EMIT_RR (spe_absdb, 0x053);
-EMIT_RR (spe_sumb, 0x253);
-EMIT_R (spe_xsbh, 0x2b6);
-EMIT_R (spe_xshw, 0x2ae);
-EMIT_R (spe_xswd, 0x2a6);
-EMIT_RR (spe_and, 0x0c1);
-EMIT_RR (spe_andc, 0x2c1);
-EMIT_RI10(spe_andbi, 0x016);
-EMIT_RI10(spe_andhi, 0x015);
-EMIT_RI10(spe_andi, 0x014);
-EMIT_RR (spe_or, 0x041);
-EMIT_RR (spe_orc, 0x2c9);
-EMIT_RI10(spe_orbi, 0x006);
-EMIT_RI10(spe_orhi, 0x005);
-EMIT_RI10(spe_ori, 0x004);
-EMIT_R (spe_orx, 0x1f0);
-EMIT_RR (spe_xor, 0x241);
-EMIT_RI10(spe_xorbi, 0x026);
-EMIT_RI10(spe_xorhi, 0x025);
-EMIT_RI10(spe_xori, 0x024);
-EMIT_RR (spe_nand, 0x0c9);
-EMIT_RR (spe_nor, 0x049);
-EMIT_RR (spe_eqv, 0x249);
-EMIT_RRR (spe_selb, 0x008);
-EMIT_RRR (spe_shufb, 0x00b);
+EMIT_RR (spe_ah, 0x0c8)
+EMIT_RI10(spe_ahi, 0x01d)
+EMIT_RR (spe_a, 0x0c0)
+EMIT_RI10s(spe_ai, 0x01c)
+EMIT_RR (spe_sfh, 0x048)
+EMIT_RI10(spe_sfhi, 0x00d)
+EMIT_RR (spe_sf, 0x040)
+EMIT_RI10(spe_sfi, 0x00c)
+EMIT_RR (spe_addx, 0x340)
+EMIT_RR (spe_cg, 0x0c2)
+EMIT_RR (spe_cgx, 0x342)
+EMIT_RR (spe_sfx, 0x341)
+EMIT_RR (spe_bg, 0x042)
+EMIT_RR (spe_bgx, 0x343)
+EMIT_RR (spe_mpy, 0x3c4)
+EMIT_RR (spe_mpyu, 0x3cc)
+EMIT_RI10(spe_mpyi, 0x074)
+EMIT_RI10(spe_mpyui, 0x075)
+EMIT_RRR (spe_mpya, 0x00c)
+EMIT_RR (spe_mpyh, 0x3c5)
+EMIT_RR (spe_mpys, 0x3c7)
+EMIT_RR (spe_mpyhh, 0x3c6)
+EMIT_RR (spe_mpyhha, 0x346)
+EMIT_RR (spe_mpyhhu, 0x3ce)
+EMIT_RR (spe_mpyhhau, 0x34e)
+EMIT_R (spe_clz, 0x2a5)
+EMIT_R (spe_cntb, 0x2b4)
+EMIT_R (spe_fsmb, 0x1b6)
+EMIT_R (spe_fsmh, 0x1b5)
+EMIT_R (spe_fsm, 0x1b4)
+EMIT_R (spe_gbb, 0x1b2)
+EMIT_R (spe_gbh, 0x1b1)
+EMIT_R (spe_gb, 0x1b0)
+EMIT_RR (spe_avgb, 0x0d3)
+EMIT_RR (spe_absdb, 0x053)
+EMIT_RR (spe_sumb, 0x253)
+EMIT_R (spe_xsbh, 0x2b6)
+EMIT_R (spe_xshw, 0x2ae)
+EMIT_R (spe_xswd, 0x2a6)
+EMIT_RR (spe_and, 0x0c1)
+EMIT_RR (spe_andc, 0x2c1)
+EMIT_RI10s(spe_andbi, 0x016)
+EMIT_RI10s(spe_andhi, 0x015)
+EMIT_RI10s(spe_andi, 0x014)
+EMIT_RR (spe_or, 0x041)
+EMIT_RR (spe_orc, 0x2c9)
+EMIT_RI10s(spe_orbi, 0x006)
+EMIT_RI10s(spe_orhi, 0x005)
+EMIT_RI10s(spe_ori, 0x004)
+EMIT_R (spe_orx, 0x1f0)
+EMIT_RR (spe_xor, 0x241)
+EMIT_RI10s(spe_xorbi, 0x046)
+EMIT_RI10s(spe_xorhi, 0x045)
+EMIT_RI10s(spe_xori, 0x044)
+EMIT_RR (spe_nand, 0x0c9)
+EMIT_RR (spe_nor, 0x049)
+EMIT_RR (spe_eqv, 0x249)
+EMIT_RRR (spe_selb, 0x008)
+EMIT_RRR (spe_shufb, 0x00b)
/* Shift and rotate instructions
*/
-EMIT_RR (spe_shlh, 0x05f);
-EMIT_RI7 (spe_shlhi, 0x07f);
-EMIT_RR (spe_shl, 0x05b);
-EMIT_RI7 (spe_shli, 0x07b);
-EMIT_RR (spe_shlqbi, 0x1db);
-EMIT_RI7 (spe_shlqbii, 0x1fb);
-EMIT_RR (spe_shlqby, 0x1df);
-EMIT_RI7 (spe_shlqbyi, 0x1ff);
-EMIT_RR (spe_shlqbybi, 0x1cf);
-EMIT_RR (spe_roth, 0x05c);
-EMIT_RI7 (spe_rothi, 0x07c);
-EMIT_RR (spe_rot, 0x058);
-EMIT_RI7 (spe_roti, 0x078);
-EMIT_RR (spe_rotqby, 0x1dc);
-EMIT_RI7 (spe_rotqbyi, 0x1fc);
-EMIT_RR (spe_rotqbybi, 0x1cc);
-EMIT_RR (spe_rotqbi, 0x1d8);
-EMIT_RI7 (spe_rotqbii, 0x1f8);
-EMIT_RR (spe_rothm, 0x05d);
-EMIT_RI7 (spe_rothmi, 0x07d);
-EMIT_RR (spe_rotm, 0x059);
-EMIT_RI7 (spe_rotmi, 0x079);
-EMIT_RR (spe_rotqmby, 0x1dd);
-EMIT_RI7 (spe_rotqmbyi, 0x1fd);
-EMIT_RR (spe_rotqmbybi, 0x1cd);
-EMIT_RR (spe_rotqmbi, 0x1c9);
-EMIT_RI7 (spe_rotqmbii, 0x1f9);
-EMIT_RR (spe_rotmah, 0x05e);
-EMIT_RI7 (spe_rotmahi, 0x07e);
-EMIT_RR (spe_rotma, 0x05a);
-EMIT_RI7 (spe_rotmai, 0x07a);
+EMIT_RR (spe_shlh, 0x05f)
+EMIT_RI7 (spe_shlhi, 0x07f)
+EMIT_RR (spe_shl, 0x05b)
+EMIT_RI7 (spe_shli, 0x07b)
+EMIT_RR (spe_shlqbi, 0x1db)
+EMIT_RI7 (spe_shlqbii, 0x1fb)
+EMIT_RR (spe_shlqby, 0x1df)
+EMIT_RI7 (spe_shlqbyi, 0x1ff)
+EMIT_RR (spe_shlqbybi, 0x1cf)
+EMIT_RR (spe_roth, 0x05c)
+EMIT_RI7 (spe_rothi, 0x07c)
+EMIT_RR (spe_rot, 0x058)
+EMIT_RI7 (spe_roti, 0x078)
+EMIT_RR (spe_rotqby, 0x1dc)
+EMIT_RI7 (spe_rotqbyi, 0x1fc)
+EMIT_RR (spe_rotqbybi, 0x1cc)
+EMIT_RR (spe_rotqbi, 0x1d8)
+EMIT_RI7 (spe_rotqbii, 0x1f8)
+EMIT_RR (spe_rothm, 0x05d)
+EMIT_RI7 (spe_rothmi, 0x07d)
+EMIT_RR (spe_rotm, 0x059)
+EMIT_RI7 (spe_rotmi, 0x079)
+EMIT_RR (spe_rotqmby, 0x1dd)
+EMIT_RI7 (spe_rotqmbyi, 0x1fd)
+EMIT_RR (spe_rotqmbybi, 0x1cd)
+EMIT_RR (spe_rotqmbi, 0x1c9)
+EMIT_RI7 (spe_rotqmbii, 0x1f9)
+EMIT_RR (spe_rotmah, 0x05e)
+EMIT_RI7 (spe_rotmahi, 0x07e)
+EMIT_RR (spe_rotma, 0x05a)
+EMIT_RI7 (spe_rotmai, 0x07a)
/* Compare, branch, and halt instructions
*/
-EMIT_RR (spe_heq, 0x3d8);
-EMIT_RI10(spe_heqi, 0x07f);
-EMIT_RR (spe_hgt, 0x258);
-EMIT_RI10(spe_hgti, 0x04f);
-EMIT_RR (spe_hlgt, 0x2d8);
-EMIT_RI10(spe_hlgti, 0x05f);
-EMIT_RR (spe_ceqb, 0x3d0);
-EMIT_RI10(spe_ceqbi, 0x07e);
-EMIT_RR (spe_ceqh, 0x3c8);
-EMIT_RI10(spe_ceqhi, 0x07d);
-EMIT_RR (spe_ceq, 0x3c0);
-EMIT_RI10(spe_ceqi, 0x07c);
-EMIT_RR (spe_cgtb, 0x250);
-EMIT_RI10(spe_cgtbi, 0x04e);
-EMIT_RR (spe_cgth, 0x248);
-EMIT_RI10(spe_cgthi, 0x04d);
-EMIT_RR (spe_cgt, 0x240);
-EMIT_RI10(spe_cgti, 0x04c);
-EMIT_RR (spe_clgtb, 0x2d0);
-EMIT_RI10(spe_clgtbi, 0x05e);
-EMIT_RR (spe_clgth, 0x2c8);
-EMIT_RI10(spe_clgthi, 0x05d);
-EMIT_RR (spe_clgt, 0x2c0);
-EMIT_RI10(spe_clgti, 0x05c);
-EMIT_I16 (spe_br, 0x064);
-EMIT_I16 (spe_bra, 0x060);
-EMIT_RI16(spe_brsl, 0x066);
-EMIT_RI16(spe_brasl, 0x062);
-EMIT_RI16(spe_brnz, 0x042);
-EMIT_RI16(spe_brz, 0x040);
-EMIT_RI16(spe_brhnz, 0x046);
-EMIT_RI16(spe_brhz, 0x044);
-
-extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
-extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
-extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
+EMIT_RR (spe_heq, 0x3d8)
+EMIT_RI10(spe_heqi, 0x07f)
+EMIT_RR (spe_hgt, 0x258)
+EMIT_RI10(spe_hgti, 0x04f)
+EMIT_RR (spe_hlgt, 0x2d8)
+EMIT_RI10(spe_hlgti, 0x05f)
+EMIT_RR (spe_ceqb, 0x3d0)
+EMIT_RI10(spe_ceqbi, 0x07e)
+EMIT_RR (spe_ceqh, 0x3c8)
+EMIT_RI10(spe_ceqhi, 0x07d)
+EMIT_RR (spe_ceq, 0x3c0)
+EMIT_RI10(spe_ceqi, 0x07c)
+EMIT_RR (spe_cgtb, 0x250)
+EMIT_RI10(spe_cgtbi, 0x04e)
+EMIT_RR (spe_cgth, 0x248)
+EMIT_RI10(spe_cgthi, 0x04d)
+EMIT_RR (spe_cgt, 0x240)
+EMIT_RI10(spe_cgti, 0x04c)
+EMIT_RR (spe_clgtb, 0x2d0)
+EMIT_RI10(spe_clgtbi, 0x05e)
+EMIT_RR (spe_clgth, 0x2c8)
+EMIT_RI10(spe_clgthi, 0x05d)
+EMIT_RR (spe_clgt, 0x2c0)
+EMIT_RI10(spe_clgti, 0x05c)
+EMIT_I16 (spe_br, 0x064)
+EMIT_I16 (spe_bra, 0x060)
+EMIT_RI16(spe_brsl, 0x066)
+EMIT_RI16(spe_brasl, 0x062)
+EMIT_RI16(spe_brnz, 0x042)
+EMIT_RI16(spe_brz, 0x040)
+EMIT_RI16(spe_brhnz, 0x046)
+EMIT_RI16(spe_brhz, 0x044)
+
+/* Control instructions
+ */
+EMIT (spe_lnop, 0x001)
+
+extern void
+spe_lqd(struct spe_function *p, int rT, int rA, int offset);
+
+extern void
+spe_stqd(struct spe_function *p, int rT, int rA, int offset);
+
+extern void spe_bi(struct spe_function *p, int rA, int d, int e);
+extern void spe_iret(struct spe_function *p, int rA, int d, int e);
+extern void spe_bisled(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bisl(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_biz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_binz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bihz(struct spe_function *p, int rT, int rA,
int d, int e);
-extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
+extern void spe_bihnz(struct spe_function *p, int rT, int rA,
int d, int e);
/** Load/splat immediate float into rT. */
extern void
-spe_load_float(struct spe_function *p, unsigned rT, float x);
+spe_load_float(struct spe_function *p, int rT, float x);
/** Load/splat immediate int into rT. */
extern void
-spe_load_int(struct spe_function *p, unsigned rT, int i);
+spe_load_int(struct spe_function *p, int rT, int i);
+
+/** Load/splat immediate unsigned int into rT. */
+extern void
+spe_load_uint(struct spe_function *p, int rT, uint ui);
+
+/** And immediate value into rT. */
+extern void
+spe_and_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Xor immediate value into rT. */
+extern void
+spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Compare equal with immediate value. */
+extern void
+spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui);
+
+/** Compare greater with immediate value. */
+extern void
+spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui);
/** Replicate word 0 of rA across rT. */
extern void
-spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
+spe_splat(struct spe_function *p, int rT, int rA);
-/** Complement/invert all bits in rT. */
+/** rT = complement_all_bits(rA). */
extern void
-spe_complement(struct spe_function *p, unsigned rT);
+spe_complement(struct spe_function *p, int rT, int rA);
/** rT = rA. */
extern void
-spe_move(struct spe_function *p, unsigned rT, unsigned rA);
+spe_move(struct spe_function *p, int rT, int rA);
/** rT = {0,0,0,0}. */
extern void
-spe_zero(struct spe_function *p, unsigned rT);
+spe_zero(struct spe_function *p, int rT);
+
+/** rT = splat(rA, word) */
+extern void
+spe_splat_word(struct spe_function *p, int rT, int rA, int word);
+
+/** rT = float min(rA, rB) */
+extern void
+spe_float_min(struct spe_function *p, int rT, int rA, int rB);
+
+/** rT = float max(rA, rB) */
+extern void
+spe_float_max(struct spe_function *p, int rT, int rA, int rB);
/* Floating-point instructions
*/
-EMIT_RR (spe_fa, 0x2c4);
-EMIT_RR (spe_dfa, 0x2cc);
-EMIT_RR (spe_fs, 0x2c5);
-EMIT_RR (spe_dfs, 0x2cd);
-EMIT_RR (spe_fm, 0x2c6);
-EMIT_RR (spe_dfm, 0x2ce);
-EMIT_RRR (spe_fma, 0x00e);
-EMIT_RR (spe_dfma, 0x35c);
-EMIT_RRR (spe_fnms, 0x00d);
-EMIT_RR (spe_dfnms, 0x35e);
-EMIT_RRR (spe_fms, 0x00f);
-EMIT_RR (spe_dfms, 0x35d);
-EMIT_RR (spe_dfnma, 0x35f);
-EMIT_R (spe_frest, 0x1b8);
-EMIT_R (spe_frsqest, 0x1b9);
-EMIT_RR (spe_fi, 0x3d4);
-EMIT_RI8 (spe_csflt, 0x1da, 155);
-EMIT_RI8 (spe_cflts, 0x1d8, 173);
-EMIT_RI8 (spe_cuflt, 0x1db, 155);
-EMIT_RI8 (spe_cfltu, 0x1d9, 173);
-EMIT_R (spe_frds, 0x3b9);
-EMIT_R (spe_fesd, 0x3b8);
-EMIT_RR (spe_dfceq, 0x3c3);
-EMIT_RR (spe_dfcmeq, 0x3cb);
-EMIT_RR (spe_dfcgt, 0x2c3);
-EMIT_RR (spe_dfcmgt, 0x2cb);
-EMIT_RI7 (spe_dftsv, 0x3bf);
-EMIT_RR (spe_fceq, 0x3c2);
-EMIT_RR (spe_fcmeq, 0x3ca);
-EMIT_RR (spe_fcgt, 0x2c2);
-EMIT_RR (spe_fcmgt, 0x2ca);
-EMIT_R (spe_fscrwr, 0x3ba);
-EMIT_ (spe_fscrrd, 0x398);
+EMIT_RR (spe_fa, 0x2c4)
+EMIT_RR (spe_dfa, 0x2cc)
+EMIT_RR (spe_fs, 0x2c5)
+EMIT_RR (spe_dfs, 0x2cd)
+EMIT_RR (spe_fm, 0x2c6)
+EMIT_RR (spe_dfm, 0x2ce)
+EMIT_RRR (spe_fma, 0x00e)
+EMIT_RR (spe_dfma, 0x35c)
+EMIT_RRR (spe_fnms, 0x00d)
+EMIT_RR (spe_dfnms, 0x35e)
+EMIT_RRR (spe_fms, 0x00f)
+EMIT_RR (spe_dfms, 0x35d)
+EMIT_RR (spe_dfnma, 0x35f)
+EMIT_R (spe_frest, 0x1b8)
+EMIT_R (spe_frsqest, 0x1b9)
+EMIT_RR (spe_fi, 0x3d4)
+EMIT_RI8 (spe_csflt, 0x1da, 155)
+EMIT_RI8 (spe_cflts, 0x1d8, 173)
+EMIT_RI8 (spe_cuflt, 0x1db, 155)
+EMIT_RI8 (spe_cfltu, 0x1d9, 173)
+EMIT_R (spe_frds, 0x3b9)
+EMIT_R (spe_fesd, 0x3b8)
+EMIT_RR (spe_dfceq, 0x3c3)
+EMIT_RR (spe_dfcmeq, 0x3cb)
+EMIT_RR (spe_dfcgt, 0x2c3)
+EMIT_RR (spe_dfcmgt, 0x2cb)
+EMIT_RI7 (spe_dftsv, 0x3bf)
+EMIT_RR (spe_fceq, 0x3c2)
+EMIT_RR (spe_fcmeq, 0x3ca)
+EMIT_RR (spe_fcgt, 0x2c2)
+EMIT_RR (spe_fcmgt, 0x2ca)
+EMIT_R (spe_fscrwr, 0x3ba)
+EMIT_ (spe_fscrrd, 0x398)
/* Channel instructions
*/
-EMIT_R (spe_rdch, 0x00d);
-EMIT_R (spe_rdchcnt, 0x00f);
-EMIT_R (spe_wrch, 0x10d);
+EMIT_R (spe_rdch, 0x00d)
+EMIT_R (spe_rdchcnt, 0x00f)
+EMIT_R (spe_wrch, 0x10d)
#ifdef UNDEF_EMIT_MACROS
+#undef EMIT
#undef EMIT_
#undef EMIT_R
#undef EMIT_RR
@@ -361,6 +425,7 @@ EMIT_R (spe_wrch, 0x10d);
#undef EMIT_RI7
#undef EMIT_RI8
#undef EMIT_RI10
+#undef EMIT_RI10s
#undef EMIT_RI16
#undef EMIT_RI18
#undef EMIT_I16
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index ad9d8f8ced..57fcf6de2a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -26,7 +26,7 @@
#if defined(PIPE_ARCH_X86)
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_pointer.h"
#include "rtasm_execmem.h"
@@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p,
/* Oh-oh we've stumbled into the SIB thing.
*/
if (regmem.file == file_REG32 &&
- regmem.idx == reg_SP) {
+ regmem.idx == reg_SP &&
+ regmem.mod != mod_REG) {
emit_1ub(p, 0x24); /* simplistic! */
}
@@ -439,25 +440,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg)
}
-/* michal:
- * Temporary. As I need immediate operands, and dont want to mess with the codegen,
- * I load the immediate into general purpose register and use it.
- */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
+ assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
}
-void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
+/**
+ * Immediate group 1 instructions.
+ */
+static INLINE void
+x86_group1_imm( struct x86_function *p,
+ unsigned op, struct x86_reg dst, int imm )
{
- DUMP_RI( dst, imm );
+ assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
- emit_1ub(p, 0x80);
- emit_modrm_noreg(p, 0, dst);
- emit_1ub(p, imm);
+ if(-0x80 <= imm && imm < 0x80) {
+ emit_1ub(p, 0x83);
+ emit_modrm_noreg(p, op, dst);
+ emit_1b(p, (char)imm);
+ }
+ else {
+ emit_1ub(p, 0x81);
+ emit_modrm_noreg(p, op, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 0, dst, imm);
+}
+
+void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 1, dst, imm);
+}
+
+void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 4, dst, imm);
+}
+
+void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 5, dst, imm);
+}
+
+void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 6, dst, imm);
+}
+
+void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ x86_group1_imm(p, 7, dst, imm);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index af79f07dd3..1b5eaaca85 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label );
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
-/* michal:
- * Temporary. As I need immediate operands, and dont want to mess with the codegen,
- * I load the immediate into general purpose register and use it.
- */
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
-void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm );
+void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
/* Macro for sse_shufps() and sse2_pshufd():
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index c7155a9316..d7df9490cf 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -11,6 +11,7 @@ C_SOURCES = \
tgsi_info.c \
tgsi_iterate.c \
tgsi_parse.c \
+ tgsi_ppc.c \
tgsi_scan.c \
tgsi_sse2.c \
tgsi_text.c \
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 45bf3f6d57..8200cce42f 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary(
'tgsi_parse.c',
'tgsi_sanity.c',
'tgsi_scan.c',
+ 'tgsi_ppc.c',
'tgsi_sse2.c',
'tgsi_text.c',
'tgsi_transform.c',
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index ed8fc5ac25..a1891a140a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
#include "tgsi_parse.h"
@@ -114,7 +114,7 @@ tgsi_default_declaration( void )
struct tgsi_declaration declaration;
declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
- declaration.Size = 1;
+ declaration.NrTokens = 1;
declaration.File = TGSI_FILE_NULL;
declaration.UsageMask = TGSI_WRITEMASK_XYZW;
declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
@@ -160,9 +160,9 @@ declaration_grow(
struct tgsi_declaration *declaration,
struct tgsi_header *header )
{
- assert( declaration->Size < 0xFF );
+ assert( declaration->NrTokens < 0xFF );
- declaration->Size++;
+ declaration->NrTokens++;
header_bodysize_grow( header );
}
@@ -308,7 +308,7 @@ tgsi_default_immediate( void )
struct tgsi_immediate immediate;
immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
- immediate.Size = 1;
+ immediate.NrTokens = 1;
immediate.DataType = TGSI_IMM_FLOAT32;
immediate.Padding = 0;
immediate.Extended = 0;
@@ -345,9 +345,9 @@ immediate_grow(
struct tgsi_immediate *immediate,
struct tgsi_header *header )
{
- assert( immediate->Size < 0xFF );
+ assert( immediate->NrTokens < 0xFF );
- immediate->Size++;
+ immediate->NrTokens++;
header_bodysize_grow( header );
}
@@ -384,7 +384,7 @@ tgsi_build_full_immediate(
*immediate = tgsi_build_immediate( header );
- for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+ for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) {
struct tgsi_immediate_float32 *if32;
if( maxsize <= size )
@@ -411,7 +411,7 @@ tgsi_default_instruction( void )
struct tgsi_instruction instruction;
instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- instruction.Size = 1;
+ instruction.NrTokens = 1;
instruction.Opcode = TGSI_OPCODE_MOV;
instruction.Saturate = TGSI_SAT_NONE;
instruction.NumDstRegs = 1;
@@ -453,9 +453,9 @@ instruction_grow(
struct tgsi_instruction *instruction,
struct tgsi_header *header )
{
- assert (instruction->Size < 0xFF);
+ assert (instruction->NrTokens < 0xFF);
- instruction->Size++;
+ instruction->NrTokens++;
header_bodysize_grow( header );
}
@@ -801,10 +801,14 @@ tgsi_default_instruction_ext_nv( void )
return instruction_ext_nv;
}
-union token_u32
+
+/** test for inequality of 32-bit values pointed to by a and b */
+static INLINE boolean
+compare32(const void *a, const void *b)
{
- unsigned u32;
-};
+ return *((uint32_t *) a) != *((uint32_t *) b);
+}
+
unsigned
tgsi_compare_instruction_ext_nv(
@@ -813,7 +817,7 @@ tgsi_compare_instruction_ext_nv(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_nv
@@ -872,7 +876,7 @@ tgsi_compare_instruction_ext_label(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_label
@@ -913,7 +917,7 @@ tgsi_compare_instruction_ext_texture(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_instruction_ext_texture
@@ -1035,7 +1039,7 @@ tgsi_compare_src_register_ext_swz(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_src_register_ext_swz
@@ -1103,7 +1107,7 @@ tgsi_compare_src_register_ext_mod(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_src_register_ext_mod
@@ -1249,7 +1253,7 @@ tgsi_compare_dst_register_ext_concode(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_dst_register_ext_concode
@@ -1307,7 +1311,7 @@ tgsi_compare_dst_register_ext_modulate(
{
a.Padding = b.Padding = 0;
a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+ return compare32(&a, &b);
}
struct tgsi_dst_register_ext_modulate
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 2ed8c2bf07..d57cb9139f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
@@ -285,7 +285,7 @@ iter_immediate(
ENM( imm->Immediate.DataType, immediate_type_names );
TXT( " { " );
- for (i = 0; i < imm->Immediate.Size - 1; i++) {
+ for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
FLT( imm->u.ImmediateFloat32[i].Float );
@@ -294,7 +294,7 @@ iter_immediate(
assert( 0 );
}
- if (i < imm->Immediate.Size - 2)
+ if (i < imm->Immediate.NrTokens - 2)
TXT( ", " );
}
TXT( " }" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index be25cb45a0..3dc61c48ca 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump_c.h"
#include "tgsi_build.h"
@@ -283,7 +283,7 @@ dump_immediate_verbose(
UIX( imm->Immediate.Padding );
}
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
@@ -646,7 +646,6 @@ tgsi_dump_c(
struct tgsi_full_declaration fd;
uint ignored = flags & TGSI_DUMP_C_IGNORED;
uint deflt = flags & TGSI_DUMP_C_DEFAULT;
- uint instno = 0;
tgsi_parse_init( &parse, tokens );
@@ -676,7 +675,7 @@ tgsi_dump_c(
ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
if( ignored ) {
TXT( "\nSize : " );
- UID( parse.FullToken.Token.Size );
+ UID( parse.FullToken.Token.NrTokens );
if( deflt || parse.FullToken.Token.Extended ) {
TXT( "\nExtended : " );
UID( parse.FullToken.Token.Extended );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f98b66dc0b..5c5d8d2550 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers)
+ struct tgsi_sampler **samplers)
{
uint k;
struct tgsi_parse_context parse;
@@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert( size % 4 == 0 );
assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
@@ -320,6 +320,7 @@ micro_add(
dst->f[3] = src0->f[3] + src1->f[3];
}
+#if 0
static void
micro_iadd(
union tgsi_exec_channel *dst,
@@ -331,6 +332,7 @@ micro_iadd(
dst->i[2] = src0->i[2] + src1->i[2];
dst->i[3] = src0->i[3] + src1->i[3];
}
+#endif
static void
micro_and(
@@ -408,6 +410,7 @@ micro_div(
}
}
+#if 0
static void
micro_udiv(
union tgsi_exec_channel *dst,
@@ -419,6 +422,7 @@ micro_udiv(
dst->u[2] = src0->u[2] / src1->u[2];
dst->u[3] = src0->u[3] / src1->u[3];
}
+#endif
static void
micro_eq(
@@ -434,6 +438,7 @@ micro_eq(
dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ieq(
union tgsi_exec_channel *dst,
@@ -447,6 +452,7 @@ micro_ieq(
dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
static void
micro_exp2(
@@ -466,6 +472,7 @@ micro_exp2(
#endif
}
+#if 0
static void
micro_f2ut(
union tgsi_exec_channel *dst,
@@ -476,6 +483,7 @@ micro_f2ut(
dst->u[2] = (uint) src->f[2];
dst->u[3] = (uint) src->f[3];
}
+#endif
static void
micro_flr(
@@ -570,6 +578,7 @@ micro_lt(
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ilt(
union tgsi_exec_channel *dst,
@@ -583,7 +592,9 @@ micro_ilt(
dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
+#if 0
static void
micro_ult(
union tgsi_exec_channel *dst,
@@ -597,6 +608,7 @@ micro_ult(
dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
}
+#endif
static void
micro_max(
@@ -610,6 +622,7 @@ micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imax(
union tgsi_exec_channel *dst,
@@ -621,7 +634,9 @@ micro_imax(
dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umax(
union tgsi_exec_channel *dst,
@@ -633,6 +648,7 @@ micro_umax(
dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
static void
micro_min(
@@ -646,6 +662,7 @@ micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imin(
union tgsi_exec_channel *dst,
@@ -657,7 +674,9 @@ micro_imin(
dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umin(
union tgsi_exec_channel *dst,
@@ -669,7 +688,9 @@ micro_umin(
dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
+#if 0
static void
micro_umod(
union tgsi_exec_channel *dst,
@@ -681,6 +702,7 @@ micro_umod(
dst->u[2] = src0->u[2] % src1->u[2];
dst->u[3] = src0->u[3] % src1->u[3];
}
+#endif
static void
micro_mul(
@@ -694,6 +716,7 @@ micro_mul(
dst->f[3] = src0->f[3] * src1->f[3];
}
+#if 0
static void
micro_imul(
union tgsi_exec_channel *dst,
@@ -705,7 +728,9 @@ micro_imul(
dst->i[2] = src0->i[2] * src1->i[2];
dst->i[3] = src0->i[3] * src1->i[3];
}
+#endif
+#if 0
static void
micro_imul64(
union tgsi_exec_channel *dst0,
@@ -722,7 +747,9 @@ micro_imul64(
dst0->i[2] = 0;
dst0->i[3] = 0;
}
+#endif
+#if 0
static void
micro_umul64(
union tgsi_exec_channel *dst0,
@@ -739,7 +766,10 @@ micro_umul64(
dst0->u[2] = 0;
dst0->u[3] = 0;
}
+#endif
+
+#if 0
static void
micro_movc(
union tgsi_exec_channel *dst,
@@ -752,6 +782,7 @@ micro_movc(
dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
}
+#endif
static void
micro_neg(
@@ -764,6 +795,7 @@ micro_neg(
dst->f[3] = -src->f[3];
}
+#if 0
static void
micro_ineg(
union tgsi_exec_channel *dst,
@@ -774,6 +806,7 @@ micro_ineg(
dst->i[2] = -src->i[2];
dst->i[3] = -src->i[3];
}
+#endif
static void
micro_not(
@@ -874,6 +907,7 @@ micro_trunc(
dst->f[3] = (float) (int) src0->f[3];
}
+#if 0
static void
micro_ushr(
union tgsi_exec_channel *dst,
@@ -885,6 +919,7 @@ micro_ushr(
dst->u[2] = src0->u[2] >> src1->u[2];
dst->u[3] = src0->u[3] >> src1->u[3];
}
+#endif
static void
micro_sin(
@@ -919,6 +954,7 @@ micro_sub(
dst->f[3] = src0->f[3] - src1->f[3];
}
+#if 0
static void
micro_u2f(
union tgsi_exec_channel *dst,
@@ -929,6 +965,7 @@ micro_u2f(
dst->f[2] = (float) src->u[2];
dst->f[3] = (float) src->u[3];
}
+#endif
static void
micro_xor(
@@ -1045,11 +1082,28 @@ fetch_source(
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1086,19 +1140,31 @@ fetch_source(
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
@@ -1109,6 +1175,17 @@ fetch_source(
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1141,6 +1218,11 @@ fetch_source(
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
@@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach,
/*
- * Fetch a texel using STR texture coordinates.
+ * Fetch a four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
@@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach,
boolean projected)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[4];
uint chan_index;
float lodBias;
@@ -1547,7 +1629,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
@@ -1573,7 +1655,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias, /* inputs */
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -1599,7 +1681,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias,
&r[0], &r[1], &r[2], &r[3]);
break;
@@ -1709,6 +1791,7 @@ exec_declaration(
break;
default:
+ eval = NULL;
assert( 0 );
}
@@ -1751,7 +1834,7 @@ exec_instruction(
case TGSI_OPCODE_ARL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
+ micro_flr( &r[0], &r[0] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1806,6 +1889,7 @@ exec_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
+ micro_abs( &r[0], &r[0] );
micro_sqrt( &r[0], &r[0] );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fc40a25e09..4ffd4efbff 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -68,17 +68,12 @@ struct tgsi_interp_coef
float dady[NUM_CHANNELS];
};
-
-struct softpipe_tile_cache; /**< Opaque to TGSI */
-
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
*/
struct tgsi_sampler
{
- const struct pipe_sampler_state *state;
- struct pipe_texture *texture;
/** Get samples for four fragments in a quad */
void (*get_samples)(struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
@@ -86,8 +81,6 @@ struct tgsi_sampler
const float p[QUAD_SIZE],
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
- void *pipe; /*XXX temporary*/
- struct softpipe_tile_cache *cache;
};
/**
@@ -178,6 +171,16 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MAX_LOOP_NESTING 20
#define TGSI_EXEC_MAX_CALL_NESTING 20
+/* The maximum number of input attributes per vertex. For 2D
+ * input register files, this is the stride between two 1D
+ * arrays.
+ */
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
+
+/* The maximum number of constant vectors per constant buffer.
+ */
+#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
@@ -195,7 +198,7 @@ struct tgsi_exec_machine
struct tgsi_exec_vector *Temps;
struct tgsi_exec_vector *Addrs;
- struct tgsi_sampler *Samplers;
+ struct tgsi_sampler **Samplers;
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
@@ -258,7 +261,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers);
+ struct tgsi_sampler **samplers);
uint
tgsi_exec_machine_run(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 68c7a6b7f5..2b8a6f0fb1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_info.h"
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
index 5371a88b96..d88c2558d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_iterate.h"
boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 3757486ba9..22006edf3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
@@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens(
1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
}
+
+/**
+ * This function is used to avoid and work-around type punning/aliasing
+ * warnings. The warnings seem harmless on x86 but on PPC they cause
+ * real failures.
+ */
+static INLINE void
+copy_token(void *dst, const void *src)
+{
+ memcpy(dst, src, 4);
+}
+
+
+/**
+ * Get next 4-byte token, return it at address specified by 'token'
+ */
static void
next_token(
struct tgsi_parse_context *ctx,
void *token )
{
assert( !tgsi_parse_end_of_tokens( ctx ) );
-
- *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+ copy_token(token, &ctx->Tokens[ctx->Position]);
+ ctx->Position++;
}
+
void
tgsi_parse_token(
struct tgsi_parse_context *ctx )
@@ -116,7 +133,7 @@ tgsi_parse_token(
struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
*decl = tgsi_default_full_declaration();
- decl->Declaration = *(struct tgsi_declaration *) &token;
+ copy_token(&decl->Declaration, &token);
next_token( ctx, &decl->DeclarationRange );
@@ -132,15 +149,14 @@ tgsi_parse_token(
struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
*imm = tgsi_default_full_immediate();
- imm->Immediate = *(struct tgsi_immediate *) &token;
-
+ copy_token(&imm->Immediate, &token);
assert( !imm->Immediate.Extended );
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
imm->u.Pointer = MALLOC(
- sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) );
+ for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
}
break;
@@ -158,8 +174,7 @@ tgsi_parse_token(
unsigned extended;
*inst = tgsi_default_full_instruction();
- inst->Instruction = *(struct tgsi_instruction *) &token;
-
+ copy_token(&inst->Instruction, &token);
extended = inst->Instruction.Extended;
while( extended ) {
@@ -169,18 +184,15 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_INSTRUCTION_EXT_TYPE_NV:
- inst->InstructionExtNv =
- *(struct tgsi_instruction_ext_nv *) &token;
+ copy_token(&inst->InstructionExtNv, &token);
break;
case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
- inst->InstructionExtLabel =
- *(struct tgsi_instruction_ext_label *) &token;
+ copy_token(&inst->InstructionExtLabel, &token);
break;
case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
- inst->InstructionExtTexture =
- *(struct tgsi_instruction_ext_texture *) &token;
+ copy_token(&inst->InstructionExtTexture, &token);
break;
default:
@@ -212,13 +224,13 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
- inst->FullDstRegisters[i].DstRegisterExtConcode =
- *(struct tgsi_dst_register_ext_concode *) &token;
+ copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode,
+ &token);
break;
case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
- inst->FullDstRegisters[i].DstRegisterExtModulate =
- *(struct tgsi_dst_register_ext_modulate *) &token;
+ copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate,
+ &token);
break;
default:
@@ -245,13 +257,13 @@ tgsi_parse_token(
switch( token.Type ) {
case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
- inst->FullSrcRegisters[i].SrcRegisterExtSwz =
- *(struct tgsi_src_register_ext_swz *) &token;
+ copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz,
+ &token);
break;
case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
- inst->FullSrcRegisters[i].SrcRegisterExtMod =
- *(struct tgsi_src_register_ext_mod *) &token;
+ copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod,
+ &token);
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
new file mode 100644
index 0000000000..0c64ae5713
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -0,0 +1,1363 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI to PowerPC code generation.
+ */
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_sse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi_dump.h"
+#include "tgsi_exec.h"
+#include "tgsi_ppc.h"
+#include "rtasm/rtasm_ppc.h"
+
+
+/**
+ * Since it's pretty much impossible to form PPC vector immediates, load
+ * them from memory here:
+ */
+const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
+ 1.0f, -128.0f, 128.0, 0.0
+};
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+ FOR_EACH_CHANNEL( CHAN )\
+ IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+/**
+ * How many TGSI temps should be implemented with real PPC vector registers
+ * rather than memory.
+ */
+#define MAX_PPC_TEMPS 3
+
+
+/**
+ * Context/state used during code gen.
+ */
+struct gen_context
+{
+ struct ppc_function *f;
+ int inputs_reg; /**< GP register pointing to input params */
+ int outputs_reg; /**< GP register pointing to output params */
+ int temps_reg; /**< GP register pointing to temporary "registers" */
+ int immed_reg; /**< GP register pointing to immediates buffer */
+ int const_reg; /**< GP register pointing to constants buffer */
+ int builtins_reg; /**< GP register pointint to built-in constants */
+
+ int offset_reg; /**< used to reduce redundant li instructions */
+ int offset_value;
+
+ int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */
+ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
+
+ /**
+ * Map TGSI temps to PPC vector temps.
+ * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps.
+ * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1].
+ */
+ int temps_map[MAX_PPC_TEMPS][4];
+
+ /**
+ * Cache of src registers.
+ * This is used to avoid redundant load instructions.
+ */
+ struct {
+ struct tgsi_full_src_register src;
+ uint chan;
+ uint vec;
+ } regs[12]; /* 3 src regs, 4 channels */
+ uint num_regs;
+};
+
+
+/**
+ * Initialize code generation context.
+ */
+static void
+init_gen_context(struct gen_context *gen, struct ppc_function *func)
+{
+ uint i;
+
+ memset(gen, 0, sizeof(*gen));
+ gen->f = func;
+ gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */
+ gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */
+ gen->temps_reg = ppc_reserve_register(func, 5); /* ... */
+ gen->immed_reg = ppc_reserve_register(func, 6);
+ gen->const_reg = ppc_reserve_register(func, 7);
+ gen->builtins_reg = ppc_reserve_register(func, 8);
+ gen->one_vec = -1;
+ gen->bit31_vec = -1;
+ gen->offset_reg = -1;
+ gen->offset_value = -9999999;
+ for (i = 0; i < MAX_PPC_TEMPS; i++) {
+ gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f);
+ gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f);
+ }
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
+{
+ return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->SrcRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg)
+{
+ return (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->DstRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+
+/**
+ * All PPC vector load/store instructions form an effective address
+ * by adding the contents of two registers. For example:
+ * lvx v2,r8,r9 # v2 = memory[r8 + r9]
+ * stvx v2,r8,r9 # memory[r8 + r9] = v2;
+ * So our lvx/stvx instructions are typically preceded by an 'li' instruction
+ * to load r9 (above) with an immediate (an offset).
+ * This code emits that 'li' instruction, but only if the offset value is
+ * different than the previous 'li'.
+ * This optimization seems to save about 10% in the instruction count.
+ * Note that we need to unconditionally emit an 'li' inside basic blocks
+ * (such as inside loops).
+ */
+static int
+emit_li_offset(struct gen_context *gen, int offset)
+{
+ if (gen->offset_reg <= 0) {
+ /* allocate a GP register for storing load/store offset */
+ gen->offset_reg = ppc_allocate_register(gen->f);
+ }
+
+ /* emit new 'li' if offset is changing */
+ if (gen->offset_value < 0 || gen->offset_value != offset) {
+ gen->offset_value = offset;
+ ppc_li(gen->f, gen->offset_reg, offset);
+ }
+
+ return gen->offset_reg;
+}
+
+
+/**
+ * Forces subsequent emit_li_offset() calls to emit an 'li'.
+ * To be called at the top of basic blocks.
+ */
+static void
+reset_li_offset(struct gen_context *gen)
+{
+ gen->offset_value = -9999999;
+}
+
+
+
+/**
+ * Load the given vector register with {value, value, value, value}.
+ * The value must be in the ppu_builtin_constants[] array.
+ * We wouldn't need this if there was a simple way to load PPC vector
+ * registers with immediate values!
+ */
+static void
+load_constant_vec(struct gen_context *gen, int dst_vec, float value)
+{
+ uint pos;
+ for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) {
+ if (ppc_builtin_constants[pos] == value) {
+ int offset = pos * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our builtins start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
+ /* splat word[pos % 4] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
+ return;
+ }
+ }
+ assert(0 && "Need to add new constant to ppc_builtin_constants array");
+}
+
+
+/**
+ * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
+ */
+static int
+gen_one_vec(struct gen_context *gen)
+{
+ if (gen->one_vec < 0) {
+ gen->one_vec = ppc_allocate_vec_register(gen->f);
+ load_constant_vec(gen, gen->one_vec, 1.0f);
+ }
+ return gen->one_vec;
+}
+
+/**
+ * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
+ */
+static int
+gen_get_bit31_vec(struct gen_context *gen)
+{
+ if (gen->bit31_vec < 0) {
+ gen->bit31_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vspltisw(gen->f, gen->bit31_vec, -1);
+ ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
+ }
+ return gen->bit31_vec;
+}
+
+
+/**
+ * Register fetch. Return PPC vector register with result.
+ */
+static int
+emit_fetch(struct gen_context *gen,
+ const struct tgsi_full_src_register *reg,
+ const unsigned chan_index)
+{
+ uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+ int dst_vec = -1;
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch (reg->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (is_ppc_vec_temporary(reg)) {
+ /* use PPC vec register */
+ dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
+ }
+ else {
+ /* use memory-based temp register "file" */
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our immediates start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+ /* splat word[swizzle] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ {
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+ int offset_reg = emit_li_offset(gen, offset);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our constants start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
+ /* splat word[swizzle] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
+ }
+ break;
+ default:
+ assert( 0 );
+ }
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ ppc_vzero(gen->f, dst_vec);
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ {
+ int one_vec = gen_one_vec(gen);
+ dst_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vmove(gen->f, dst_vec, one_vec);
+ }
+ break;
+ default:
+ assert( 0 );
+ }
+
+ assert(dst_vec >= 0);
+
+ {
+ uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
+ if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+ int bit31_vec = gen_get_bit31_vec(gen);
+ int dst_vec2;
+
+ if (is_ppc_vec_temporary(reg)) {
+ /* need to use a new temp */
+ dst_vec2 = ppc_allocate_vec_register(gen->f);
+ }
+ else {
+ dst_vec2 = dst_vec;
+ }
+
+ switch (sign_op) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ /* vec = vec & ~bit31 */
+ ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ /* vec = vec | bit31 */
+ ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ /* vec = vec ^ bit31 */
+ ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec);
+ break;
+ default:
+ assert(0);
+ }
+ return dst_vec2;
+ }
+ }
+
+ return dst_vec;
+}
+
+
+
+/**
+ * Test if two TGSI src registers refer to the same memory location.
+ * We use this to avoid redundant register loads.
+ */
+static boolean
+equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a,
+ const struct tgsi_full_src_register *b, uint chan_b)
+{
+ int swz_a, swz_b;
+ int sign_a, sign_b;
+ if (a->SrcRegister.File != b->SrcRegister.File)
+ return FALSE;
+ if (a->SrcRegister.Index != b->SrcRegister.Index)
+ return FALSE;
+ swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a);
+ swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b);
+ if (swz_a != swz_b)
+ return FALSE;
+ sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a);
+ sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b);
+ if (sign_a != sign_b)
+ return FALSE;
+ return TRUE;
+}
+
+
+/**
+ * Given a TGSI src register and channel index, return the PPC vector
+ * register containing the value. We use a cache to prevent re-loading
+ * the same register multiple times.
+ * \return index of PPC vector register with the desired src operand
+ */
+static int
+get_src_vec(struct gen_context *gen,
+ struct tgsi_full_instruction *inst, int src_reg, uint chan)
+{
+ const const struct tgsi_full_src_register *src =
+ &inst->FullSrcRegisters[src_reg];
+ int vec;
+ uint i;
+
+ /* check the cache */
+ for (i = 0; i < gen->num_regs; i++) {
+ if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) {
+ /* cache hit */
+ assert(gen->regs[i].vec >= 0);
+ return gen->regs[i].vec;
+ }
+ }
+
+ /* cache miss: allocate new vec reg and emit fetch/load code */
+ vec = emit_fetch(gen, src, chan);
+ gen->regs[gen->num_regs].src = *src;
+ gen->regs[gen->num_regs].chan = chan;
+ gen->regs[gen->num_regs].vec = vec;
+ gen->num_regs++;
+
+ assert(gen->num_regs <= Elements(gen->regs));
+
+ assert(vec >= 0);
+
+ return vec;
+}
+
+
+/**
+ * Clear the src operand cache. To be called at the end of each emit function.
+ */
+static void
+release_src_vecs(struct gen_context *gen)
+{
+ uint i;
+ for (i = 0; i < gen->num_regs; i++) {
+ const const struct tgsi_full_src_register src = gen->regs[i].src;
+ if (!is_ppc_vec_temporary(&src)) {
+ ppc_release_vec_register(gen->f, gen->regs[i].vec);
+ }
+ }
+ gen->num_regs = 0;
+}
+
+
+
+static int
+get_dst_vec(struct gen_context *gen,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index)
+{
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
+ if (is_ppc_vec_temporary_dst(reg)) {
+ int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+ return vec;
+ }
+ else {
+ return ppc_allocate_vec_register(gen->f);
+ }
+}
+
+
+/**
+ * Register store. Store 'src_vec' at location indicated by 'reg'.
+ * \param free_vec Should the src_vec be released when done?
+ */
+static void
+emit_store(struct gen_context *gen,
+ int src_vec,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index,
+ boolean free_vec)
+{
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
+ switch (reg->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ {
+ int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (is_ppc_vec_temporary_dst(reg)) {
+ if (!free_vec) {
+ int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+ if (dst_vec != src_vec)
+ ppc_vmove(gen->f, dst_vec, src_vec);
+ }
+ free_vec = FALSE;
+ }
+ else {
+ int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+ int offset_reg = emit_li_offset(gen, offset);
+ ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
+ }
+ break;
+#if 0
+ case TGSI_FILE_ADDRESS:
+ emit_addrs(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
+#endif
+ default:
+ assert( 0 );
+ }
+
+#if 0
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ /* assert( 0 ); */
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+ }
+#endif
+
+ if (free_vec)
+ ppc_release_vec_register(gen->f, src_vec);
+}
+
+
+static void
+emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int v0, v1;
+ uint chan_index;
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_X);
+ v1 = ppc_allocate_vec_register(gen->f);
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_RSQ:
+ /* v1 = 1.0 / sqrt(v0) */
+ ppc_vrsqrtefp(gen->f, v1, v0);
+ break;
+ case TGSI_OPCODE_RCP:
+ /* v1 = 1.0 / v0 */
+ ppc_vrefp(gen->f, v1, v0);
+ break;
+ default:
+ assert(0);
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ emit_store(gen, v1, inst, chan_index, FALSE);
+ }
+
+ release_src_vecs(gen);
+ ppc_release_vec_register(gen->f, v1);
+}
+
+
+static void
+emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+ int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */
+ int v1 = get_dst_vec(gen, inst, chan_index);
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ /* turn off the most significant bit of each vector float word */
+ {
+ int bit31_vec = gen_get_bit31_vec(gen);
+ ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
+ }
+ break;
+ case TGSI_OPCODE_FLOOR:
+ ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
+ break;
+ case TGSI_OPCODE_FRAC:
+ ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
+ ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
+ break;
+ case TGSI_OPCODE_EXPBASE2:
+ ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
+ break;
+ case TGSI_OPCODE_LOGBASE2:
+ /* XXX this may be broken! */
+ ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
+ break;
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ if (v0 != v1)
+ ppc_vmove(gen->f, v1, v0);
+ break;
+ default:
+ assert(0);
+ }
+ emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int zero_vec = -1;
+ uint chan;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+ zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_dst_vec(gen, inst, chan);
+
+ /* emit binop */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ ppc_vaddfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_SUB:
+ ppc_vsubfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_MUL:
+ ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec);
+ break;
+ case TGSI_OPCODE_MIN:
+ ppc_vminfp(gen->f, v2, v0, v1);
+ break;
+ case TGSI_OPCODE_MAX:
+ ppc_vmaxfp(gen->f, v2, v0, v1);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* store v2 */
+ emit_store(gen, v2, inst, chan, TRUE);
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MUL)
+ ppc_release_vec_register(gen->f, zero_vec);
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_src_vec(gen, inst, 2, chan);
+ int v3 = get_dst_vec(gen, inst, chan);
+
+ /* emit ALU */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MAD:
+ ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */
+ break;
+ case TGSI_OPCODE_LRP:
+ ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */
+ ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */
+ break;
+ default:
+ assert(0);
+ }
+
+ /* store v3 */
+ emit_store(gen, v3, inst, chan, TRUE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+/**
+ * Vector comparisons, resulting in 1.0 or 0.0 values.
+ */
+static void
+emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ uint chan;
+ int one_vec = gen_one_vec(gen);
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ /* fetch src operands */
+ int v0 = get_src_vec(gen, inst, 0, chan);
+ int v1 = get_src_vec(gen, inst, 1, chan);
+ int v2 = get_dst_vec(gen, inst, chan);
+ boolean complement = FALSE;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SNE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SEQ:
+ ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
+ break;
+
+ case TGSI_OPCODE_SGE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SLT:
+ ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
+ break;
+
+ case TGSI_OPCODE_SLE:
+ complement = TRUE;
+ /* fall-through */
+ case TGSI_OPCODE_SGT:
+ ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
+ break;
+ default:
+ assert(0);
+ }
+
+ /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
+
+ if (complement)
+ ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */
+ else
+ ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */
+
+ /* store v2 */
+ emit_store(gen, v2, inst, chan, TRUE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int v0, v1, v2;
+ uint chan_index;
+
+ v2 = ppc_allocate_vec_register(gen->f);
+
+ ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */
+ v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */
+ v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */
+ v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
+ v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */
+ v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
+ v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+ ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+ emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */
+ }
+
+ release_src_vecs(gen);
+
+ ppc_release_vec_register(gen->f, v2);
+}
+
+
+/** Approximation for vr = pow(va, vb) */
+static void
+ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
+{
+ /* pow(a,b) ~= exp2(log2(a) * b) */
+ int t_vec = ppc_allocate_vec_register(f);
+ int zero_vec = ppc_allocate_vec_register(f);
+
+ ppc_vzero(f, zero_vec);
+
+ ppc_vlogefp(f, t_vec, va); /* t = log2(va) */
+ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */
+ ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */
+
+ ppc_release_vec_register(f, t_vec);
+ ppc_release_vec_register(f, zero_vec);
+}
+
+
+static void
+emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int one_vec = gen_one_vec(gen);
+
+ /* Compute X */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_store(gen, one_vec, inst, CHAN_X, FALSE);
+ }
+
+ /* Compute Y, Z */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int x_vec;
+ int zero_vec = ppc_allocate_vec_register(gen->f);
+
+ x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */
+
+ ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */
+ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_store(gen, x_vec, inst, CHAN_Y, FALSE);
+ }
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int y_vec, w_vec;
+ int z_vec = ppc_allocate_vec_register(gen->f);
+ int pow_vec = ppc_allocate_vec_register(gen->f);
+ int pos_vec = ppc_allocate_vec_register(gen->f);
+ int p128_vec = ppc_allocate_vec_register(gen->f);
+ int n128_vec = ppc_allocate_vec_register(gen->f);
+
+ y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */
+ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
+
+ w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */
+
+ /* clamp W to [-128, 128] */
+ load_constant_vec(gen, p128_vec, 128.0f);
+ load_constant_vec(gen, n128_vec, -128.0f);
+ ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */
+ ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */
+
+ /* if temp.x > 0
+ * z = pow(tmp.y, tmp.w)
+ * else
+ * z = 0.0
+ */
+ ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */
+ ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
+ ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */
+
+ emit_store(gen, z_vec, inst, CHAN_Z, FALSE);
+
+ ppc_release_vec_register(gen->f, z_vec);
+ ppc_release_vec_register(gen->f, pow_vec);
+ ppc_release_vec_register(gen->f, pos_vec);
+ ppc_release_vec_register(gen->f, p128_vec);
+ ppc_release_vec_register(gen->f, n128_vec);
+ }
+
+ ppc_release_vec_register(gen->f, zero_vec);
+ }
+
+ /* Compute W */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ const int one_vec = gen_one_vec(gen);
+ int src_vec;
+
+ /* get src arg */
+ src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+ /* Compute X = 2^floor(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_X);
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
+ ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */
+ emit_store(gen, dst_vec, inst, CHAN_X, TRUE);
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Y = src - floor(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Y);
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */
+ ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */
+ emit_store(gen, dst_vec, inst, CHAN_Y, TRUE);
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Z = RoughApprox2ToX(src) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+ ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */
+ emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+ }
+
+ /* Compute W = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ const int bit31_vec = gen_get_bit31_vec(gen);
+ const int one_vec = gen_one_vec(gen);
+ int src_vec, abs_vec;
+
+ /* get src arg */
+ src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+ /* compute abs(src) */
+ abs_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) &&
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+
+ /* compute tmp = floor(log2(abs)) */
+ int tmp_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */
+ ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */
+
+ /* Compute X = tmp */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+ }
+
+ /* Compute Y = abs / 2^tmp */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ const int zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+ ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */
+ ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */
+ /* tmp = abs * tmp + zero */
+ ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+ ppc_release_vec_register(gen->f, zero_vec);
+ }
+
+ ppc_release_vec_register(gen->f, tmp_vec);
+ }
+
+ /* Compute Z = RoughApproxLog2(abs) */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+ ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */
+ emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+ }
+
+ /* Compute W = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+ }
+
+ ppc_release_vec_register(gen->f, abs_vec);
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int s0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+ int s1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+ int pow_vec = ppc_allocate_vec_register(gen->f);
+ int chan;
+
+ ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec);
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+ emit_store(gen, pow_vec, inst, chan, FALSE);
+ }
+
+ ppc_release_vec_register(gen->f, pow_vec);
+
+ release_src_vecs(gen);
+}
+
+
+static void
+emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+ int x0_vec, y0_vec, z0_vec;
+ int x1_vec, y1_vec, z1_vec;
+ int zero_vec, tmp_vec;
+ int tmp2_vec;
+
+ zero_vec = ppc_allocate_vec_register(gen->f);
+ ppc_vzero(gen->f, zero_vec);
+
+ tmp_vec = ppc_allocate_vec_register(gen->f);
+ tmp2_vec = ppc_allocate_vec_register(gen->f);
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ x0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+ x1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+ }
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ y0_vec = get_src_vec(gen, inst, 0, CHAN_Y);
+ y1_vec = get_src_vec(gen, inst, 1, CHAN_Y);
+ }
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ z0_vec = get_src_vec(gen, inst, 0, CHAN_Z);
+ z1_vec = get_src_vec(gen, inst, 1, CHAN_Z);
+ }
+
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) {
+ /* tmp = y0 * z1 */
+ ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec);
+ /* tmp = tmp - z0 * y1*/
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+ }
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) {
+ /* tmp = z0 * x1 */
+ ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec);
+ /* tmp = tmp - x0 * z1 */
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+ }
+ IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) {
+ /* tmp = x0 * y1 */
+ ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec);
+ /* tmp = tmp - y0 * x1 */
+ ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec);
+ emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE);
+ }
+ /* W is undefined */
+
+ ppc_release_vec_register(gen->f, tmp_vec);
+ ppc_release_vec_register(gen->f, zero_vec);
+ release_src_vecs(gen);
+}
+
+static int
+emit_instruction(struct gen_context *gen,
+ struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_LOGBASE2:
+ emit_unaryop(gen, inst);
+ break;
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_RCP:
+ emit_scalar_unaryop(gen, inst);
+ break;
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ emit_binop(gen, inst);
+ break;
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGE:
+ emit_inequality(gen, inst);
+ break;
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_LRP:
+ emit_triop(gen, inst);
+ break;
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ emit_dotprod(gen, inst);
+ break;
+ case TGSI_OPCODE_LIT:
+ emit_lit(gen, inst);
+ break;
+ case TGSI_OPCODE_LOG:
+ emit_log(gen, inst);
+ break;
+ case TGSI_OPCODE_EXP:
+ emit_exp(gen, inst);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_pow(gen, inst);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(gen, inst);
+ break;
+ case TGSI_OPCODE_END:
+ /* normal end */
+ return 1;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+
+static void
+emit_declaration(
+ struct ppc_function *func,
+ struct tgsi_full_declaration *decl )
+{
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+#if 0
+ unsigned first, last, mask;
+ unsigned i, j;
+
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ switch( decl->Declaration.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_coef_a0( func, 0, i, j );
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_coef_a0( func, 4, i, j );
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 4 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
+ emit_coef_a0( func, 5, i, j );
+ emit_rcp( func, 4, 4 ); /* 1.0 / w */
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 5 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ default:
+ assert( 0 );
+ break;
+ }
+ }
+ }
+ }
+#endif
+ }
+}
+
+
+
+static void
+emit_prologue(struct ppc_function *func)
+{
+ /* XXX set up stack frame */
+}
+
+
+static void
+emit_epilogue(struct ppc_function *func)
+{
+ ppc_comment(func, -4, "Epilogue:");
+ ppc_return(func);
+ /* XXX restore prev stack frame */
+#if 0
+ debug_printf("PPC: Emitted %u instructions\n", func->num_inst);
+#endif
+}
+
+
+
+/**
+ * Translate a TGSI vertex/fragment shader to PPC code.
+ *
+ * \param tokens the TGSI input shader
+ * \param func the output PPC code/function
+ * \param immediates buffer to place immediates, later passed to PPC func
+ * \return TRUE for success, FALSE if translation failed
+ */
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+ struct ppc_function *func,
+ float (*immediates)[4],
+ boolean do_swizzles )
+{
+ static int use_ppc_asm = -1;
+ struct tgsi_parse_context parse;
+ /*boolean instruction_phase = FALSE;*/
+ unsigned ok = 1;
+ uint num_immediates = 0;
+ struct gen_context gen;
+ uint ic = 0;
+
+ if (use_ppc_asm < 0) {
+ /* If GALLIUM_NOPPC is set, don't use PPC codegen */
+ use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE);
+ }
+ if (!use_ppc_asm)
+ return FALSE;
+
+ if (0) {
+ debug_printf("\n********* TGSI->PPC ********\n");
+ tgsi_dump(tokens, 0);
+ }
+
+ util_init_math();
+
+ init_gen_context(&gen, func);
+
+ emit_prologue(func);
+
+ tgsi_parse_init( &parse, tokens );
+
+ while (!tgsi_parse_end_of_tokens(&parse) && ok) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+ emit_declaration(func, &parse.FullToken.FullDeclaration );
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (func->print) {
+ _debug_printf("# ");
+ ic++;
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
+ }
+
+ ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
+
+ if (!ok) {
+ debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
+ parse.FullToken.FullInstruction.Instruction.Opcode,
+ parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+ "vertex shader" : "fragment shader");
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* splat each immediate component into a float[4] vector for SoA */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ uint i;
+ assert(size <= 4);
+ assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+ for (i = 0; i < size; i++) {
+ immediates[num_immediates][i] =
+ parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ }
+ num_immediates++;
+ }
+ break;
+
+ default:
+ ok = 0;
+ assert( 0 );
+ }
+ }
+
+ emit_epilogue(func);
+
+ tgsi_parse_free( &parse );
+
+ if (ppc_num_instructions(func) == 0) {
+ /* ran out of memory for instructions */
+ ok = FALSE;
+ }
+
+ if (!ok)
+ debug_printf("TGSI->PPC translation failed\n");
+
+ return ok;
+}
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
new file mode 100644
index 0000000000..829ec075e7
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
@@ -0,0 +1,51 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_PPC_H
+#define TGSI_PPC_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_token;
+struct ppc_function;
+
+extern const float ppc_builtin_constants[];
+
+
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+ struct ppc_function *function,
+ float (*immediates)[4],
+ boolean do_swizzles);
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_PPC_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index bc7b941b78..76e773da91 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_sanity.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 1239f6c076..c535788819 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -43,6 +43,9 @@
/**
+ * Scan the given TGSI shader to collect information such as number of
+ * registers used, special instructions used, etc.
+ * \return info the result of the scan
*/
void
tgsi_scan_shader(const struct tgsi_token *tokens,
@@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
{
const struct tgsi_full_declaration *fulldecl
= &parse.FullToken.FullDeclaration;
- uint file = fulldecl->Declaration.File;
+ const uint file = fulldecl->Declaration.File;
uint reg;
for (reg = fulldecl->DeclarationRange.First;
reg <= fulldecl->DeclarationRange.Last;
@@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_inputs++;
}
-
- if (file == TGSI_FILE_OUTPUT) {
+ else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_outputs++;
@@ -149,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- info->immediate_count++;
+ {
+ uint reg = info->immediate_count++;
+ uint file = TGSI_FILE_IMMEDIATE;
+
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+ }
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ff869c8312..d70bcd03c5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -25,9 +25,16 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
+
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#if defined(PIPE_ARCH_SSE)
+#include "util/u_sse.h"
+#endif
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -35,8 +42,6 @@
#include "rtasm/rtasm_x86sse.h"
-#ifdef PIPE_ARCH_X86
-
/* for 1/sqrt()
*
* This costs about 100fps (close to 10%) in gears:
@@ -509,10 +514,31 @@ emit_coef_dady(
* Function call helpers.
*/
+/**
+ * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be
+ * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
+ * that the stack pointer is 16 byte aligned, as expected.
+ */
static void
-emit_push_gp(
- struct x86_function *func )
+emit_func_call_dst(
+ struct x86_function *func,
+ unsigned xmm_save,
+ unsigned xmm_dst,
+ void (PIPE_CDECL *code)() )
{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ unsigned i, n;
+ unsigned xmm_mask;
+
+ /* Bitmask of the xmm registers to save */
+ xmm_mask = (1 << xmm_save) - 1;
+ xmm_mask &= ~(1 << xmm_dst);
+
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, 0 ),
+ make_xmm( xmm_dst ) );
+
x86_push(
func,
x86_make_reg( file_REG32, reg_AX) );
@@ -522,12 +548,49 @@ emit_push_gp(
x86_push(
func,
x86_make_reg( file_REG32, reg_DX) );
-}
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i))
+ ++n;
+
+ x86_sub_imm(
+ func,
+ x86_make_reg( file_REG32, reg_SP ),
+ n*16);
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i)) {
+ sse_movups(
+ func,
+ x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
+ make_xmm( i ) );
+ ++n;
+ }
+
+ x86_lea(
+ func,
+ ecx,
+ get_temp( TEMP_R0, 0 ) );
+
+ x86_push( func, ecx );
+ x86_mov_reg_imm( func, ecx, (unsigned long) code );
+ x86_call( func, ecx );
+ x86_pop(func, ecx );
+
+ for(i = 0, n = 0; i < 8; ++i)
+ if(xmm_mask & (1 << i)) {
+ sse_movups(
+ func,
+ make_xmm( i ),
+ x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
+ ++n;
+ }
+
+ x86_add_imm(
+ func,
+ x86_make_reg( file_REG32, reg_SP ),
+ n*16);
-static void
-x86_pop_gp(
- struct x86_function *func )
-{
/* Restore GP registers in a reverse order.
*/
x86_pop(
@@ -539,39 +602,6 @@ x86_pop_gp(
x86_pop(
func,
x86_make_reg( file_REG32, reg_AX) );
-}
-
-static void
-emit_func_call_dst(
- struct x86_function *func,
- unsigned xmm_dst,
- void (PIPE_CDECL *code)() )
-{
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
-
- emit_push_gp(
- func );
-
- {
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- x86_lea(
- func,
- ecx,
- get_temp( TEMP_R0, 0 ) );
-
- x86_push( func, ecx );
- x86_mov_reg_imm( func, ecx, (unsigned long) code );
- x86_call( func, ecx );
- x86_pop(func, ecx );
- }
-
-
- x86_pop_gp(
- func );
sse_movaps(
func,
@@ -582,6 +612,7 @@ emit_func_call_dst(
static void
emit_func_call_dst_src(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst,
unsigned xmm_src,
void (PIPE_CDECL *code)() )
@@ -593,10 +624,119 @@ emit_func_call_dst_src(
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
code );
}
+
+#if defined(PIPE_ARCH_SSE)
+
+/*
+ * Fast SSE2 implementation of special math functions.
+ */
+
+#define POLY0(x, c0) _mm_set1_ps(c0)
+#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
+#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
+#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
+#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
+#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
+
+#define EXP_POLY_DEGREE 3
+#define LOG_POLY_DEGREE 5
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128
+exp2f4(__m128 x)
+{
+ __m128i ipart;
+ __m128 fpart, expipart, expfpart;
+
+ x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
+ x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
+
+ /* ipart = int(x - 0.5) */
+ ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
+
+ /* fpart = x - ipart */
+ fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
+
+ /* expipart = (float) (1 << ipart) */
+ expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
+
+ /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
+#if EXP_POLY_DEGREE == 5
+ expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
+#elif EXP_POLY_DEGREE == 4
+ expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
+#elif EXP_POLY_DEGREE == 3
+ expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
+#elif EXP_POLY_DEGREE == 2
+ expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
+#else
+#error
+#endif
+
+ return _mm_mul_ps(expipart, expfpart);
+}
+
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128
+log2f4(__m128 x)
+{
+ __m128i expmask = _mm_set1_epi32(0x7f800000);
+ __m128i mantmask = _mm_set1_epi32(0x007fffff);
+ __m128 one = _mm_set1_ps(1.0f);
+
+ __m128i i = _mm_castps_si128(x);
+
+ /* exp = (float) exponent(x) */
+ __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
+
+ /* mant = (float) mantissa(x) */
+ __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
+
+ __m128 logmant;
+
+ /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
+ * These coefficients can be generate with
+ * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+ */
+#if LOG_POLY_DEGREE == 6
+ logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
+#elif LOG_POLY_DEGREE == 5
+ logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
+#elif LOG_POLY_DEGREE == 4
+ logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+#elif LOG_POLY_DEGREE == 3
+ logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
+#else
+#error
+#endif
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
+
+ return _mm_add_ps(logmant, exp);
+}
+
+
+static INLINE __m128
+powf4(__m128 x, __m128 y)
+{
+ return exp2f4(_mm_mul_ps(log2f4(x), y));
+}
+
+#endif /* PIPE_ARCH_SSE */
+
+
+
/**
* Low-level instruction translators.
*/
@@ -639,38 +779,42 @@ cos4f(
static void
emit_cos(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
cos4f );
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
ex24f(
float *store )
{
-#if FAST_MATH
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
+#else
store[0] = util_fast_exp2( store[0] );
store[1] = util_fast_exp2( store[1] );
store[2] = util_fast_exp2( store[2] );
store[3] = util_fast_exp2( store[3] );
-#else
- store[0] = powf( 2.0f, store[0] );
- store[1] = powf( 2.0f, store[1] );
- store[2] = powf( 2.0f, store[2] );
- store[3] = powf( 2.0f, store[3] );
#endif
}
static void
emit_ex2(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
ex24f );
}
@@ -710,10 +854,12 @@ flr4f(
static void
emit_flr(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
flr4f );
}
@@ -731,31 +877,42 @@ frc4f(
static void
emit_frc(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
frc4f );
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
lg24f(
float *store )
{
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
+#else
store[0] = util_fast_log2( store[0] );
store[1] = util_fast_log2( store[1] );
store[2] = util_fast_log2( store[2] );
store[3] = util_fast_log2( store[3] );
+#endif
}
static void
emit_lg2(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
lg24f );
}
@@ -797,30 +954,32 @@ emit_neg(
}
static void PIPE_CDECL
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
+__attribute__((force_align_arg_pointer))
+#endif
pow4f(
float *store )
{
-#if FAST_MATH
+#if defined(PIPE_ARCH_SSE)
+ _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
+#else
store[0] = util_fast_pow( store[0], store[4] );
store[1] = util_fast_pow( store[1], store[5] );
store[2] = util_fast_pow( store[2], store[6] );
store[3] = util_fast_pow( store[3], store[7] );
-#else
- store[0] = powf( store[0], store[4] );
- store[1] = powf( store[1], store[5] );
- store[2] = powf( store[2], store[6] );
- store[3] = powf( store[3], store[7] );
#endif
}
static void
emit_pow(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst,
unsigned xmm_src )
{
emit_func_call_dst_src(
func,
+ xmm_save,
xmm_dst,
xmm_src,
pow4f );
@@ -855,10 +1014,12 @@ rnd4f(
static void
emit_rnd(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
rnd4f );
}
@@ -935,10 +1096,12 @@ sgn4f(
static void
emit_sgn(
struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
sgn4f );
}
@@ -955,10 +1118,12 @@ sin4f(
static void
emit_sin (struct x86_function *func,
+ unsigned xmm_save,
unsigned xmm_dst)
{
emit_func_call_dst(
func,
+ xmm_save,
xmm_dst,
sin4f );
}
@@ -1378,7 +1543,7 @@ emit_instruction(
get_temp(
TGSI_EXEC_TEMP_MINUS_128_I,
TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 1, 2 );
+ emit_pow( func, 3, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
sse_xorps(
func,
@@ -1410,6 +1575,7 @@ emit_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_abs( func, 0 );
emit_rsqrt( func, 1, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 1, 0, chan_index );
@@ -1424,11 +1590,11 @@ emit_instruction(
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
emit_MOV( func, 1, 0 );
- emit_flr( func, 1 );
+ emit_flr( func, 2, 1 );
/* dst.x = ex2(floor(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
emit_MOV( func, 2, 1 );
- emit_ex2( func, 2 );
+ emit_ex2( func, 3, 2 );
STORE( func, *inst, 2, 0, CHAN_X );
}
/* dst.y = src.x - floor(src.x) */
@@ -1440,7 +1606,7 @@ emit_instruction(
}
/* dst.z = ex2(src.x) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- emit_ex2( func, 0 );
+ emit_ex2( func, 3, 0 );
STORE( func, *inst, 0, 0, CHAN_Z );
}
}
@@ -1458,21 +1624,21 @@ emit_instruction(
FETCH( func, *inst, 0, 0, CHAN_X );
emit_abs( func, 0 );
emit_MOV( func, 1, 0 );
- emit_lg2( func, 1 );
+ emit_lg2( func, 2, 1 );
/* dst.z = lg2(abs(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
STORE( func, *inst, 1, 0, CHAN_Z );
}
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_flr( func, 1 );
+ emit_flr( func, 2, 1 );
/* dst.x = floor(lg2(abs(src.x))) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
STORE( func, *inst, 1, 0, CHAN_X );
}
/* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_ex2( func, 1 );
+ emit_ex2( func, 2, 1 );
emit_rcp( func, 1, 1 );
emit_mul( func, 0, 1 );
STORE( func, *inst, 0, 0, CHAN_Y );
@@ -1647,7 +1813,18 @@ emit_instruction(
case TGSI_OPCODE_DOT2ADD:
/* TGSI_OPCODE_DP2A */
- return 0;
+ FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
+ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
+ emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
+ FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FETCH( func, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ }
break;
case TGSI_OPCODE_INDEX:
@@ -1662,7 +1839,7 @@ emit_instruction(
/* TGSI_OPCODE_FRC */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_frc( func, 0 );
+ emit_frc( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1675,7 +1852,7 @@ emit_instruction(
/* TGSI_OPCODE_FLR */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_flr( func, 0 );
+ emit_flr( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1683,7 +1860,7 @@ emit_instruction(
case TGSI_OPCODE_ROUND:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_rnd( func, 0 );
+ emit_rnd( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1691,7 +1868,7 @@ emit_instruction(
case TGSI_OPCODE_EXPBASE2:
/* TGSI_OPCODE_EX2 */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_ex2( func, 0 );
+ emit_ex2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1700,7 +1877,7 @@ emit_instruction(
case TGSI_OPCODE_LOGBASE2:
/* TGSI_OPCODE_LG2 */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_lg2( func, 0 );
+ emit_lg2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1710,7 +1887,7 @@ emit_instruction(
/* TGSI_OPCODE_POW */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 1 );
+ emit_pow( func, 0, 0, 1 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1801,7 +1978,7 @@ emit_instruction(
case TGSI_OPCODE_COS:
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
+ emit_cos( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1860,7 +2037,7 @@ emit_instruction(
case TGSI_OPCODE_SIN:
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
+ emit_sin( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1927,7 +2104,7 @@ emit_instruction(
case TGSI_OPCODE_ARR:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_rnd( func, 0 );
+ emit_rnd( func, 0, 0 );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1952,7 +2129,7 @@ emit_instruction(
/* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
- emit_sgn( func, 0 );
+ emit_sgn( func, 0, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
break;
@@ -1964,12 +2141,12 @@ emit_instruction(
case TGSI_OPCODE_SCS:
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
+ emit_cos( func, 0, 0 );
STORE( func, *inst, 0, 0, CHAN_X );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
+ emit_sin( func, 0, 0 );
STORE( func, *inst, 0, 0, CHAN_Y );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
@@ -1995,7 +2172,39 @@ emit_instruction(
break;
case TGSI_OPCODE_NRM:
- return 0;
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ /* 3 or 4-component normalization */
+ {
+ uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
+ /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */
+ FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */
+ FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */
+ FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */
+ if (dims == 4) {
+ FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */
+ }
+ emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */
+ emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */
+ emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */
+ emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */
+ emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
+ emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */
+ emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */
+ emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
+ if (dims == 4) {
+ emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */
+ emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */
+ emit_add( func, 0, 0 ); /* xmm0 += xmm1 */
+ }
+ emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ if (chan_index < dims) {
+ emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */
+ STORE( func, *inst, 4+chan_index, 0, chan_index );
+ }
+ }
+ }
break;
case TGSI_OPCODE_DIV:
@@ -2003,7 +2212,16 @@ emit_instruction(
break;
case TGSI_OPCODE_DP2:
- return 0;
+ FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
+ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
+ emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
+ FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
+ emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ }
break;
case TGSI_OPCODE_TXL:
@@ -2454,7 +2672,7 @@ tgsi_emit_sse2(
case TGSI_TOKEN_TYPE_IMMEDIATE:
/* simply copy the immediate values into the next immediates[] slot */
{
- const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
uint i;
assert(size <= 4);
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 9454563361..58fe07c11d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -1023,7 +1023,7 @@ static boolean parse_immediate( struct translate_ctx *ctx )
ctx->cur++;
imm = tgsi_default_full_immediate();
- imm.Immediate.Size += 4;
+ imm.Immediate.NrTokens += 4;
imm.Immediate.DataType = TGSI_IMM_FLOAT32;
imm.u.Pointer = values;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ea87da31e5..062c1be938 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -31,7 +31,7 @@
* Authors: Brian Paul
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_transform.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 50101a9bb0..71f8a6ca40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 5c227c1eb5..671e671df2 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -4,7 +4,7 @@ include $(TOP)/configs/current
LIBNAME = util
C_SOURCES = \
- p_debug.c \
+ u_debug.c \
u_blit.c \
u_cache.c \
u_draw_quad.c \
@@ -12,6 +12,8 @@ C_SOURCES = \
u_handle_table.c \
u_hash_table.c \
u_hash.c \
+ u_keymap.c \
+ u_linear.c \
u_math.c \
u_mm.c \
u_rect.c \
@@ -21,7 +23,8 @@ C_SOURCES = \
u_stream_wd.c \
u_tile.c \
u_time.c \
- u_timed_winsys.c
+ u_timed_winsys.c \
+ u_simple_screen.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 1ef06631bf..84e4c48476 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -3,25 +3,28 @@ Import('*')
util = env.ConvenienceLibrary(
target = 'util',
source = [
- 'p_debug.c',
- 'p_debug_mem.c',
- 'p_debug_prof.c',
'u_blit.c',
'u_cache.c',
+ 'u_debug.c',
+ 'u_debug_memory.c',
+ 'u_debug_profile.c',
'u_draw_quad.c',
'u_gen_mipmap.c',
'u_handle_table.c',
'u_hash.c',
'u_hash_table.c',
+ 'u_keymap.c',
'u_math.c',
'u_mm.c',
'u_rect.c',
'u_simple_shaders.c',
'u_snprintf.c',
- 'u_stream_stdc.c',
- 'u_stream_wd.c',
+ 'u_stream_stdc.c',
+ 'u_stream_wd.c',
'u_tile.c',
'u_time.c',
+ 'u_timed_winsys.c',
+ 'u_simple_screen.c',
])
auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 2cef3338b5..efc3a874cc 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -34,10 +34,9 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_blit.h"
@@ -415,7 +414,7 @@ util_blit_pixels(struct blit_state *ctx,
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
cso_set_framebuffer(ctx->cso, &fb);
@@ -526,7 +525,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
cso_set_framebuffer(ctx->cso, &fb);
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 0a1a64259f..41cd38171f 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -36,7 +36,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 125f3daf00..43d424b1d6 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -1,6 +1,7 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (c) 2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,6 +37,13 @@
#include <windows.h>
#include <winddi.h>
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+#include <types.h>
+
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
#ifndef WIN32_LEAN_AND_MEAN
@@ -51,7 +59,7 @@
#endif
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
@@ -98,7 +106,35 @@ void _debug_vprintf(const char *format, va_list ap)
OutputDebugStringA(buf);
buf[0] = '\0';
}
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ wchar_t *wide_format;
+ long wide_str_len;
+ char buf[512];
+ int ret;
+#if (_WIN32_WCE < 600)
+ ret = vsprintf(buf, format, ap);
+ if(ret < 0){
+ sprintf(buf, "Cant handle debug print!");
+ ret = 25;
+ }
+#else
+ ret = vsprintf_s(buf, 512, format, ap);
+ if(ret < 0){
+ sprintf_s(buf, 512, "Cant handle debug print!");
+ ret = 25;
+ }
+#endif
+ buf[ret] = '\0';
+ /* Format is ascii - needs to be converted to wchar_t for printing */
+ wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0);
+ wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t));
+ if (wide_format) {
+ MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1,
+ wide_format, wide_str_len);
+ NKDbgPrintfW(wide_format, wide_format);
+ free(wide_format);
+ }
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
/* TODO */
#else /* !PIPE_SUBSYSTEM_WINDOWS */
#ifdef DEBUG
@@ -308,6 +344,13 @@ debug_get_flags_option(const char *name,
str = _debug_get_option(name);
if(!str)
result = dfault;
+ else if (!util_strcmp(str, "help")) {
+ result = dfault;
+ while (flags->name) {
+ debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value);
+ flags++;
+ }
+ }
else {
result = 0;
while( flags->name ) {
@@ -317,7 +360,12 @@ debug_get_flags_option(const char *name,
}
}
- debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ if (str) {
+ debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+ }
+ else {
+ debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+ }
return result;
}
@@ -358,6 +406,32 @@ debug_dump_enum(const struct debug_named_value *names,
const char *
+debug_dump_enum_noprefix(const struct debug_named_value *names,
+ const char *prefix,
+ unsigned long value)
+{
+ static char rest[64];
+
+ while(names->name) {
+ if(names->value == value) {
+ const char *name = names->name;
+ while (*name == *prefix) {
+ name++;
+ prefix++;
+ }
+ return name;
+ }
+ ++names;
+ }
+
+
+
+ util_snprintf(rest, sizeof(rest), "0x%08lx", value);
+ return rest;
+}
+
+
+const char *
debug_dump_flags(const struct debug_named_value *names,
unsigned long value)
{
@@ -486,16 +560,24 @@ static const struct debug_named_value pipe_format_names[] = {
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED),
DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB),
- DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_SRGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA),
DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGB),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_SRGBA),
+ DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_SRGBA),
#endif
DEBUG_NAMED_VALUE_END
};
@@ -627,6 +709,7 @@ void
debug_dump_surface_bmp(const char *filename,
struct pipe_surface *surface)
{
+#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
struct util_stream *stream;
unsigned surface_usage;
struct bmp_file_header bmfh;
@@ -693,6 +776,7 @@ error2:
FREE(rgba);
error1:
;
+#endif
}
#endif
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
new file mode 100644
index 0000000000..b298b9b66d
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -0,0 +1,361 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Cross-platform debugging helpers.
+ *
+ * For now it just has assert and printf replacements, but it might be extended
+ * with stack trace reports and more advanced logging in the near future.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef U_DEBUG_H_
+#define U_DEBUG_H_
+
+
+#include <stdarg.h>
+
+#include "pipe/p_compiler.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#if defined(DBG) || defined(DEBUG)
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+#else
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+#endif
+
+
+/* MSVC bebore VC7 does not have the __FUNCTION__ macro */
+#if defined(_MSC_VER) && _MSC_VER < 1300
+#define __FUNCTION__ "???"
+#endif
+
+
+void _debug_vprintf(const char *format, va_list ap);
+
+
+static INLINE void
+_debug_printf(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ _debug_vprintf(format, ap);
+ va_end(ap);
+}
+
+
+/**
+ * Print debug messages.
+ *
+ * The actual channel used to output debug message is platform specific. To
+ * avoid misformating or truncation, follow these rules of thumb:
+ * - output whole lines
+ * - avoid outputing large strings (512 bytes is the current maximum length
+ * that is guaranteed to be printed in all platforms)
+ */
+static INLINE void
+debug_printf(const char *format, ...)
+{
+#ifdef DEBUG
+ va_list ap;
+ va_start(ap, format);
+ _debug_vprintf(format, ap);
+ va_end(ap);
+#else
+ (void) format; /* silence warning */
+#endif
+}
+
+
+#ifdef DEBUG
+#define debug_vprintf(_format, _ap) _debug_vprintf(_format, _ap)
+#else
+#define debug_vprintf(_format, _ap) ((void)0)
+#endif
+
+
+#ifdef DEBUG
+/**
+ * Dump a blob in hex to the same place that debug_printf sends its
+ * messages.
+ */
+void debug_print_blob( const char *name, const void *blob, unsigned size );
+
+/* Print a message along with a prettified format string
+ */
+void debug_print_format(const char *msg, unsigned fmt );
+#else
+#define debug_print_blob(_name, _blob, _size) ((void)0)
+#define debug_print_format(_msg, _fmt) ((void)0)
+#endif
+
+
+void _debug_break(void);
+
+
+/**
+ * Hard-coded breakpoint.
+ */
+#ifdef DEBUG
+#if defined(PIPE_ARCH_X86) && defined(PIPE_CC_GCC)
+#define debug_break() __asm("int3")
+#elif defined(PIPE_ARCH_X86) && defined(PIPE_CC_MSVC)
+#define debug_break() do { _asm {int 3} } while(0)
+#else
+#define debug_break() _debug_break()
+#endif
+#else /* !DEBUG */
+#define debug_break() ((void)0)
+#endif /* !DEBUG */
+
+
+long
+debug_get_num_option(const char *name, long dfault);
+
+void _debug_assert_fail(const char *expr,
+ const char *file,
+ unsigned line,
+ const char *function);
+
+
+/**
+ * Assert macro
+ *
+ * Do not expect that the assert call terminates -- errors must be handled
+ * regardless of assert behavior.
+ */
+#ifdef DEBUG
+#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__))
+#else
+#define debug_assert(expr) ((void)0)
+#endif
+
+
+/** Override standard assert macro */
+#ifdef assert
+#undef assert
+#endif
+#define assert(expr) debug_assert(expr)
+
+
+/**
+ * Output the current function name.
+ */
+#ifdef DEBUG
+#define debug_checkpoint() \
+ _debug_printf("%s\n", __FUNCTION__)
+#else
+#define debug_checkpoint() \
+ ((void)0)
+#endif
+
+
+/**
+ * Output the full source code position.
+ */
+#ifdef DEBUG
+#define debug_checkpoint_full() \
+ _debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__)
+#else
+#define debug_checkpoint_full() \
+ ((void)0)
+#endif
+
+
+/**
+ * Output a warning message. Muted on release version.
+ */
+#ifdef DEBUG
+#define debug_warning(__msg) \
+ _debug_printf("%s:%u:%s: warning: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg)
+#else
+#define debug_warning(__msg) \
+ ((void)0)
+#endif
+
+
+/**
+ * Output an error message. Not muted on release version.
+ */
+#ifdef DEBUG
+#define debug_error(__msg) \
+ _debug_printf("%s:%u:%s: error: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg)
+#else
+#define debug_error(__msg) \
+ _debug_printf("error: %s\n", __msg)
+#endif
+
+
+/**
+ * Used by debug_dump_enum and debug_dump_flags to describe symbols.
+ */
+struct debug_named_value
+{
+ const char *name;
+ unsigned long value;
+};
+
+
+/**
+ * Some C pre-processor magic to simplify creating named values.
+ *
+ * Example:
+ * @code
+ * static const debug_named_value my_names[] = {
+ * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_X),
+ * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Y),
+ * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Z),
+ * DEBUG_NAMED_VALUE_END
+ * };
+ *
+ * ...
+ * debug_printf("%s = %s\n",
+ * name,
+ * debug_dump_enum(my_names, my_value));
+ * ...
+ * @endcode
+ */
+#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol}
+#define DEBUG_NAMED_VALUE_END {NULL, 0}
+
+
+/**
+ * Convert a enum value to a string.
+ */
+const char *
+debug_dump_enum(const struct debug_named_value *names,
+ unsigned long value);
+
+const char *
+debug_dump_enum_noprefix(const struct debug_named_value *names,
+ const char *prefix,
+ unsigned long value);
+
+
+/**
+ * Convert binary flags value to a string.
+ */
+const char *
+debug_dump_flags(const struct debug_named_value *names,
+ unsigned long value);
+
+
+/**
+ * Get option.
+ *
+ * It is an alias for getenv on Linux.
+ *
+ * On Windows it reads C:\gallium.cfg, which is a text file with CR+LF line
+ * endings with one option per line as
+ *
+ * NAME=value
+ *
+ * This file must be terminated with an extra empty line.
+ */
+const char *
+debug_get_option(const char *name, const char *dfault);
+
+boolean
+debug_get_bool_option(const char *name, boolean dfault);
+
+long
+debug_get_num_option(const char *name, long dfault);
+
+unsigned long
+debug_get_flags_option(const char *name,
+ const struct debug_named_value *flags,
+ unsigned long dfault);
+
+
+void *
+debug_malloc(const char *file, unsigned line, const char *function,
+ size_t size);
+
+void
+debug_free(const char *file, unsigned line, const char *function,
+ void *ptr);
+
+void *
+debug_calloc(const char *file, unsigned line, const char *function,
+ size_t count, size_t size );
+
+void *
+debug_realloc(const char *file, unsigned line, const char *function,
+ void *old_ptr, size_t old_size, size_t new_size );
+
+unsigned long
+debug_memory_begin(void);
+
+void
+debug_memory_end(unsigned long beginning);
+
+
+#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+
+void
+debug_profile_start(void);
+
+void
+debug_profile_stop(void);
+
+#endif
+
+
+#ifdef DEBUG
+struct pipe_surface;
+void debug_dump_image(const char *prefix,
+ unsigned format, unsigned cpp,
+ unsigned width, unsigned height,
+ unsigned stride,
+ const void *data);
+void debug_dump_surface(const char *prefix,
+ struct pipe_surface *surface);
+void debug_dump_surface_bmp(const char *filename,
+ struct pipe_surface *surface);
+#else
+#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
+#define debug_dump_surface(prefix, surface) ((void)0)
+#define debug_dump_surface_bmp(filename, surface) ((void)0)
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_H_ */
diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/u_debug_memory.c
index 250fd60f63..f6c136f6e5 100644
--- a/src/gallium/auxiliary/util/p_debug_mem.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -44,7 +44,7 @@
#include <stdlib.h>
#endif
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_double_list.h"
diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/u_debug_profile.c
index 5f9772ef91..5f9772ef91 100644
--- a/src/gallium/auxiliary/util/p_debug_prof.c
+++ b/src/gallium/auxiliary/util/u_debug_profile.c
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index d7bb74b87b..f282f3d289 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -29,7 +29,6 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "util/u_draw_quad.h"
@@ -53,7 +52,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
/* tell pipe about the vertex buffer */
vbuffer.buffer = vbuf;
- vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */
+ vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */
vbuffer.buffer_offset = offset;
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index b0de5968e9..7c20c7ce7b 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -34,10 +35,9 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_memory.h"
@@ -93,13 +93,82 @@ enum dtype
typedef ushort half_float;
-#if 0
-extern half_float
-float_to_half(float f);
+static half_float
+float_to_half(float f)
+{
+ /* XXX fix this */
+ return 0;
+}
+
+static float
+half_to_float(half_float h)
+{
+ /* XXX fix this */
+ return 0.0f;
+}
+
-extern float
-half_to_float(half_float h);
-#endif
+
+
+/**
+ * \name Support macros for do_row and do_row_3d
+ *
+ * The macro madness is here for two reasons. First, it compacts the code
+ * slightly. Second, it makes it much easier to adjust the specifics of the
+ * filter to tune the rounding characteristics.
+ */
+/*@{*/
+#define DECLARE_ROW_POINTERS(t, e) \
+ const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
+ const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
+ const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
+ const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
+ t(*dst)[e] = (t(*)[e]) dstRow
+
+#define DECLARE_ROW_POINTERS0(t) \
+ const t *rowA = (const t *) srcRowA; \
+ const t *rowB = (const t *) srcRowB; \
+ const t *rowC = (const t *) srcRowC; \
+ const t *rowD = (const t *) srcRowD; \
+ t *dst = (t *) dstRow
+
+#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
+ ((unsigned) Aj + (unsigned) Ak \
+ + (unsigned) Bj + (unsigned) Bk \
+ + (unsigned) Cj + (unsigned) Ck \
+ + (unsigned) Dj + (unsigned) Dk \
+ + 4) >> 3
+
+#define FILTER_3D(e) \
+ do { \
+ dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
+ rowB[j][e], rowB[k][e], \
+ rowC[j][e], rowC[k][e], \
+ rowD[j][e], rowD[k][e]); \
+ } while(0)
+
+#define FILTER_F_3D(e) \
+ do { \
+ dst[i][e] = (rowA[j][e] + rowA[k][e] \
+ + rowB[j][e] + rowB[k][e] \
+ + rowC[j][e] + rowC[k][e] \
+ + rowD[j][e] + rowD[k][e]) * 0.125F; \
+ } while(0)
+
+#define FILTER_HF_3D(e) \
+ do { \
+ const float aj = half_to_float(rowA[j][e]); \
+ const float ak = half_to_float(rowA[k][e]); \
+ const float bj = half_to_float(rowB[j][e]); \
+ const float bk = half_to_float(rowB[k][e]); \
+ const float cj = half_to_float(rowC[j][e]); \
+ const float ck = half_to_float(rowC[k][e]); \
+ const float dj = half_to_float(rowD[j][e]); \
+ const float dk = half_to_float(rowD[k][e]); \
+ dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
+ * 0.125F); \
+ } while(0)
+/*@}*/
/**
@@ -471,6 +540,385 @@ do_row(enum dtype datatype, uint comps, int srcWidth,
}
+/**
+ * Average together four rows of a source image to produce a single new
+ * row in the dest image. It's legal for the two source rows to point
+ * to the same data. The source width must be equal to either the
+ * dest width or two times the dest width.
+ *
+ * \param datatype GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
+ * \c GL_FLOAT, etc.
+ * \param comps number of components per pixel (1..4)
+ * \param srcWidth Width of a row in the source data
+ * \param srcRowA Pointer to one of the rows of source data
+ * \param srcRowB Pointer to one of the rows of source data
+ * \param srcRowC Pointer to one of the rows of source data
+ * \param srcRowD Pointer to one of the rows of source data
+ * \param dstWidth Width of a row in the destination data
+ * \param srcRowA Pointer to the row of destination data
+ */
+static void
+do_row_3D(enum dtype datatype, uint comps, int srcWidth,
+ const void *srcRowA, const void *srcRowB,
+ const void *srcRowC, const void *srcRowD,
+ int dstWidth, void *dstRow)
+{
+ const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
+ const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
+ uint i, j, k;
+
+ assert(comps >= 1);
+ assert(comps <= 4);
+
+ if ((datatype == UBYTE) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(ubyte, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ FILTER_3D(3);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(ubyte, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(ubyte, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ }
+ }
+ else if ((datatype == UBYTE) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(ubyte, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(ushort, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ FILTER_3D(3);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(ushort, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ FILTER_3D(2);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(ushort, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ FILTER_3D(1);
+ }
+ }
+ else if ((datatype == USHORT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(ushort, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_3D(0);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ FILTER_F_3D(2);
+ FILTER_F_3D(3);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(float, 3);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ FILTER_F_3D(2);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(float, 2);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ FILTER_F_3D(1);
+ }
+ }
+ else if ((datatype == FLOAT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(float, 1);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_F_3D(0);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 4)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ FILTER_HF_3D(2);
+ FILTER_HF_3D(3);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 3)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ FILTER_HF_3D(2);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 2)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ FILTER_HF_3D(1);
+ }
+ }
+ else if ((datatype == HALF_FLOAT) && (comps == 1)) {
+ DECLARE_ROW_POINTERS(half_float, 4);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ FILTER_HF_3D(0);
+ }
+ }
+ else if ((datatype == UINT) && (comps == 1)) {
+ const uint *rowA = (const uint *) srcRowA;
+ const uint *rowB = (const uint *) srcRowB;
+ const uint *rowC = (const uint *) srcRowC;
+ const uint *rowD = (const uint *) srcRowD;
+ float *dst = (float *) dstRow;
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
+ + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
+ + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
+ + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
+ dst[i] = (float)((double) tmp * 0.125);
+ }
+ }
+ else if ((datatype == USHORT_5_6_5) && (comps == 3)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0x1f;
+ const int rowCr0 = rowC[j] & 0x1f;
+ const int rowCr1 = rowC[k] & 0x1f;
+ const int rowDr0 = rowD[j] & 0x1f;
+ const int rowDr1 = rowD[k] & 0x1f;
+ const int rowAg0 = (rowA[j] >> 5) & 0x3f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x3f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x3f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x3f;
+ const int rowCg0 = (rowC[j] >> 5) & 0x3f;
+ const int rowCg1 = (rowC[k] >> 5) & 0x3f;
+ const int rowDg0 = (rowD[j] >> 5) & 0x3f;
+ const int rowDg1 = (rowD[k] >> 5) & 0x3f;
+ const int rowAb0 = (rowA[j] >> 11) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 11) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 11) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 11) & 0x1f;
+ const int rowCb0 = (rowC[j] >> 11) & 0x1f;
+ const int rowCb1 = (rowC[k] >> 11) & 0x1f;
+ const int rowDb0 = (rowD[j] >> 11) & 0x1f;
+ const int rowDb1 = (rowD[k] >> 11) & 0x1f;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ dst[i] = (b << 11) | (g << 5) | r;
+ }
+ }
+ else if ((datatype == USHORT_4_4_4_4) && (comps == 4)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0xf;
+ const int rowAr1 = rowA[k] & 0xf;
+ const int rowBr0 = rowB[j] & 0xf;
+ const int rowBr1 = rowB[k] & 0xf;
+ const int rowCr0 = rowC[j] & 0xf;
+ const int rowCr1 = rowC[k] & 0xf;
+ const int rowDr0 = rowD[j] & 0xf;
+ const int rowDr1 = rowD[k] & 0xf;
+ const int rowAg0 = (rowA[j] >> 4) & 0xf;
+ const int rowAg1 = (rowA[k] >> 4) & 0xf;
+ const int rowBg0 = (rowB[j] >> 4) & 0xf;
+ const int rowBg1 = (rowB[k] >> 4) & 0xf;
+ const int rowCg0 = (rowC[j] >> 4) & 0xf;
+ const int rowCg1 = (rowC[k] >> 4) & 0xf;
+ const int rowDg0 = (rowD[j] >> 4) & 0xf;
+ const int rowDg1 = (rowD[k] >> 4) & 0xf;
+ const int rowAb0 = (rowA[j] >> 8) & 0xf;
+ const int rowAb1 = (rowA[k] >> 8) & 0xf;
+ const int rowBb0 = (rowB[j] >> 8) & 0xf;
+ const int rowBb1 = (rowB[k] >> 8) & 0xf;
+ const int rowCb0 = (rowC[j] >> 8) & 0xf;
+ const int rowCb1 = (rowC[k] >> 8) & 0xf;
+ const int rowDb0 = (rowD[j] >> 8) & 0xf;
+ const int rowDb1 = (rowD[k] >> 8) & 0xf;
+ const int rowAa0 = (rowA[j] >> 12) & 0xf;
+ const int rowAa1 = (rowA[k] >> 12) & 0xf;
+ const int rowBa0 = (rowB[j] >> 12) & 0xf;
+ const int rowBa1 = (rowB[k] >> 12) & 0xf;
+ const int rowCa0 = (rowC[j] >> 12) & 0xf;
+ const int rowCa1 = (rowC[k] >> 12) & 0xf;
+ const int rowDa0 = (rowD[j] >> 12) & 0xf;
+ const int rowDa1 = (rowD[k] >> 12) & 0xf;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+ rowCa0, rowCa1, rowDa0, rowDa1);
+
+ dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
+ }
+ }
+ else if ((datatype == USHORT_1_5_5_5_REV) && (comps == 4)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x1f;
+ const int rowAr1 = rowA[k] & 0x1f;
+ const int rowBr0 = rowB[j] & 0x1f;
+ const int rowBr1 = rowB[k] & 0x1f;
+ const int rowCr0 = rowC[j] & 0x1f;
+ const int rowCr1 = rowC[k] & 0x1f;
+ const int rowDr0 = rowD[j] & 0x1f;
+ const int rowDr1 = rowD[k] & 0x1f;
+ const int rowAg0 = (rowA[j] >> 5) & 0x1f;
+ const int rowAg1 = (rowA[k] >> 5) & 0x1f;
+ const int rowBg0 = (rowB[j] >> 5) & 0x1f;
+ const int rowBg1 = (rowB[k] >> 5) & 0x1f;
+ const int rowCg0 = (rowC[j] >> 5) & 0x1f;
+ const int rowCg1 = (rowC[k] >> 5) & 0x1f;
+ const int rowDg0 = (rowD[j] >> 5) & 0x1f;
+ const int rowDg1 = (rowD[k] >> 5) & 0x1f;
+ const int rowAb0 = (rowA[j] >> 10) & 0x1f;
+ const int rowAb1 = (rowA[k] >> 10) & 0x1f;
+ const int rowBb0 = (rowB[j] >> 10) & 0x1f;
+ const int rowBb1 = (rowB[k] >> 10) & 0x1f;
+ const int rowCb0 = (rowC[j] >> 10) & 0x1f;
+ const int rowCb1 = (rowC[k] >> 10) & 0x1f;
+ const int rowDb0 = (rowD[j] >> 10) & 0x1f;
+ const int rowDb1 = (rowD[k] >> 10) & 0x1f;
+ const int rowAa0 = (rowA[j] >> 15) & 0x1;
+ const int rowAa1 = (rowA[k] >> 15) & 0x1;
+ const int rowBa0 = (rowB[j] >> 15) & 0x1;
+ const int rowBa1 = (rowB[k] >> 15) & 0x1;
+ const int rowCa0 = (rowC[j] >> 15) & 0x1;
+ const int rowCa1 = (rowC[k] >> 15) & 0x1;
+ const int rowDa0 = (rowD[j] >> 15) & 0x1;
+ const int rowDa1 = (rowD[k] >> 15) & 0x1;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+ rowCa0, rowCa1, rowDa0, rowDa1);
+
+ dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
+ }
+ }
+ else if ((datatype == UBYTE_3_3_2) && (comps == 3)) {
+ DECLARE_ROW_POINTERS0(ushort);
+
+ for (i = j = 0, k = k0; i < (uint) dstWidth;
+ i++, j += colStride, k += colStride) {
+ const int rowAr0 = rowA[j] & 0x3;
+ const int rowAr1 = rowA[k] & 0x3;
+ const int rowBr0 = rowB[j] & 0x3;
+ const int rowBr1 = rowB[k] & 0x3;
+ const int rowCr0 = rowC[j] & 0x3;
+ const int rowCr1 = rowC[k] & 0x3;
+ const int rowDr0 = rowD[j] & 0x3;
+ const int rowDr1 = rowD[k] & 0x3;
+ const int rowAg0 = (rowA[j] >> 2) & 0x7;
+ const int rowAg1 = (rowA[k] >> 2) & 0x7;
+ const int rowBg0 = (rowB[j] >> 2) & 0x7;
+ const int rowBg1 = (rowB[k] >> 2) & 0x7;
+ const int rowCg0 = (rowC[j] >> 2) & 0x7;
+ const int rowCg1 = (rowC[k] >> 2) & 0x7;
+ const int rowDg0 = (rowD[j] >> 2) & 0x7;
+ const int rowDg1 = (rowD[k] >> 2) & 0x7;
+ const int rowAb0 = (rowA[j] >> 5) & 0x7;
+ const int rowAb1 = (rowA[k] >> 5) & 0x7;
+ const int rowBb0 = (rowB[j] >> 5) & 0x7;
+ const int rowBb1 = (rowB[k] >> 5) & 0x7;
+ const int rowCb0 = (rowC[j] >> 5) & 0x7;
+ const int rowCb1 = (rowC[k] >> 5) & 0x7;
+ const int rowDb0 = (rowD[j] >> 5) & 0x7;
+ const int rowDb1 = (rowD[k] >> 5) & 0x7;
+ const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+ rowCr0, rowCr1, rowDr0, rowDr1);
+ const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+ rowCg0, rowCg1, rowDg0, rowDg1);
+ const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+ rowCb0, rowCb1, rowDb0, rowDb1);
+ dst[i] = (b << 5) | (g << 2) | r;
+ }
+ }
+ else {
+ debug_printf("bad format in do_row_3D()");
+ }
+}
+
+
+
static void
format_to_type_comps(enum pipe_format pformat,
enum dtype *datatype, uint *comps)
@@ -576,6 +1024,87 @@ reduce_2d(enum pipe_format pformat,
static void
+reduce_3d(enum pipe_format pformat,
+ int srcWidth, int srcHeight, int srcDepth,
+ int srcRowStride, const ubyte *srcPtr,
+ int dstWidth, int dstHeight, int dstDepth,
+ int dstRowStride, ubyte *dstPtr)
+{
+ const int bpt = pf_get_size(pformat);
+ const int border = 0;
+ int img, row;
+ int bytesPerSrcImage, bytesPerDstImage;
+ int bytesPerSrcRow, bytesPerDstRow;
+ int srcImageOffset, srcRowOffset;
+ enum dtype datatype;
+ uint comps;
+
+ format_to_type_comps(pformat, &datatype, &comps);
+
+ bytesPerSrcImage = srcWidth * srcHeight * bpt;
+ bytesPerDstImage = dstWidth * dstHeight * bpt;
+
+ bytesPerSrcRow = srcWidth * bpt;
+ bytesPerDstRow = dstWidth * bpt;
+
+ /* Offset between adjacent src images to be averaged together */
+ srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage;
+
+ /* Offset between adjacent src rows to be averaged together */
+ srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt;
+
+ /*
+ * Need to average together up to 8 src pixels for each dest pixel.
+ * Break that down into 3 operations:
+ * 1. take two rows from source image and average them together.
+ * 2. take two rows from next source image and average them together.
+ * 3. take the two averaged rows and average them for the final dst row.
+ */
+
+ /*
+ _mesa_printf("mip3d %d x %d x %d -> %d x %d x %d\n",
+ srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
+ */
+
+ for (img = 0; img < dstDepth; img++) {
+ /* first source image pointer, skipping border */
+ const ubyte *imgSrcA = srcPtr
+ + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border
+ + img * (bytesPerSrcImage + srcImageOffset);
+ /* second source image pointer, skipping border */
+ const ubyte *imgSrcB = imgSrcA + srcImageOffset;
+ /* address of the dest image, skipping border */
+ ubyte *imgDst = dstPtr
+ + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border
+ + img * bytesPerDstImage;
+
+ /* setup the four source row pointers and the dest row pointer */
+ const ubyte *srcImgARowA = imgSrcA;
+ const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
+ const ubyte *srcImgBRowA = imgSrcB;
+ const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
+ ubyte *dstImgRow = imgDst;
+
+ for (row = 0; row < dstHeight; row++) {
+ do_row_3D(datatype, comps, srcWidth,
+ srcImgARowA, srcImgARowB,
+ srcImgBRowA, srcImgBRowB,
+ dstWidth, dstImgRow);
+
+ /* advance to next rows */
+ srcImgARowA += bytesPerSrcRow + srcRowOffset;
+ srcImgARowB += bytesPerSrcRow + srcRowOffset;
+ srcImgBRowA += bytesPerSrcRow + srcRowOffset;
+ srcImgBRowB += bytesPerSrcRow + srcRowOffset;
+ dstImgRow += bytesPerDstRow;
+ }
+ }
+}
+
+
+
+
+static void
make_1d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_texture *pt,
uint face, uint baseLevel, uint lastLevel)
@@ -596,19 +1125,19 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
PIPE_BUFFER_USAGE_CPU_WRITE);
- srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ)
+ srcMap = ((ubyte *) pipe_surface_map(srcSurf,
+ PIPE_BUFFER_USAGE_CPU_READ)
+ srcSurf->offset);
- dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE)
+ dstMap = ((ubyte *) pipe_surface_map(dstSurf,
+ PIPE_BUFFER_USAGE_CPU_WRITE)
+ dstSurf->offset);
reduce_1d(pt->format,
srcSurf->width, srcMap,
dstSurf->width, dstMap);
- pipe_buffer_unmap(screen, srcSurf->buffer);
- pipe_buffer_unmap(screen, dstSurf->buffer);
+ pipe_surface_unmap(srcSurf);
+ pipe_surface_unmap(dstSurf);
pipe_surface_reference(&srcSurf, NULL);
pipe_surface_reference(&dstSurf, NULL);
@@ -639,11 +1168,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
PIPE_BUFFER_USAGE_CPU_WRITE);
- srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_READ)
+ srcMap = ((ubyte *) pipe_surface_map(srcSurf,
+ PIPE_BUFFER_USAGE_CPU_READ)
+ srcSurf->offset);
- dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE)
+ dstMap = ((ubyte *) pipe_surface_map(dstSurf,
+ PIPE_BUFFER_USAGE_CPU_WRITE)
+ dstSurf->offset);
reduce_2d(pt->format,
@@ -652,8 +1181,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
dstSurf->width, dstSurf->height,
dstSurf->stride, dstMap);
- pipe_buffer_unmap(screen, srcSurf->buffer);
- pipe_buffer_unmap(screen, dstSurf->buffer);
+ pipe_surface_unmap(srcSurf);
+ pipe_surface_unmap(dstSurf);
pipe_surface_reference(&srcSurf, NULL);
pipe_surface_reference(&dstSurf, NULL);
@@ -666,6 +1195,46 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
struct pipe_texture *pt,
uint face, uint baseLevel, uint lastLevel)
{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ uint dstLevel, zslice = 0;
+
+ assert(pt->block.width == 1);
+ assert(pt->block.height == 1);
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ struct pipe_surface *srcSurf, *dstSurf;
+ ubyte *srcMap, *dstMap;
+
+ srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ srcMap = ((ubyte *) pipe_surface_map(srcSurf,
+ PIPE_BUFFER_USAGE_CPU_READ)
+ + srcSurf->offset);
+ dstMap = ((ubyte *) pipe_surface_map(dstSurf,
+ PIPE_BUFFER_USAGE_CPU_WRITE)
+ + dstSurf->offset);
+
+#if 0
+ reduce_3d(pt->format,
+ srcSurf->width, srcSurf->height,
+ srcSurf->stride, srcMap,
+ dstSurf->width, dstSurf->height,
+ dstSurf->stride, dstMap);
+#else
+ (void) reduce_3d;
+#endif
+
+ pipe_surface_unmap(srcSurf);
+ pipe_surface_unmap(dstSurf);
+
+ pipe_surface_reference(&srcSurf, NULL);
+ pipe_surface_reference(&dstSurf, NULL);
+ }
}
@@ -920,7 +1489,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
/* init framebuffer state */
memset(&fb, 0, sizeof(fb));
- fb.num_cbufs = 1;
+ fb.nr_cbufs = 1;
/* set min/mag to same filter for faster sw speed */
ctx->sampler.mag_img_filter = filter;
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 2d15932ce3..6da7353e25 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -34,7 +34,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_handle_table.h"
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index 0bc8de9632..2f83e318e4 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -39,7 +39,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "cso_cache/cso_hash.h"
diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c
new file mode 100644
index 0000000000..3f70809efd
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_keymap.c
@@ -0,0 +1,309 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Key lookup/associative container.
+ *
+ * Like Jose's u_hash_table, based on CSO cache code for now.
+ *
+ * Author: Brian Paul
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "pipe/p_error.h"
+
+#include "cso_cache/cso_hash.h"
+
+#include "util/u_memory.h"
+#include "util/u_keymap.h"
+
+
+struct keymap
+{
+ struct cso_hash *cso;
+ unsigned key_size;
+ unsigned max_entries; /* XXX not obeyed net */
+ unsigned num_entries;
+ keymap_delete_func delete_func;
+};
+
+
+struct keymap_item
+{
+ void *key, *value;
+};
+
+
+/**
+ * This the default key-delete function used when the client doesn't
+ * provide one.
+ */
+static void
+default_delete_func(const struct keymap *map,
+ const void *key, void *data, void *user)
+{
+ FREE((void*) data);
+}
+
+
+static INLINE struct keymap_item *
+hash_table_item(struct cso_hash_iter iter)
+{
+ return (struct keymap_item *) cso_hash_iter_data(iter);
+}
+
+
+/**
+ * Return 4-byte hash key for a block of bytes.
+ */
+static unsigned
+hash(const void *key, unsigned keySize)
+{
+ unsigned i, hash;
+
+ keySize /= 4; /* convert from bytes to uints */
+
+ hash = 0;
+ for (i = 0; i < keySize; i++) {
+ hash ^= (i + 1) * ((const unsigned *) key)[i];
+ }
+
+ /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/
+
+ return hash;
+}
+
+
+/**
+ * Create a new map.
+ * \param keySize size of the keys in bytes
+ * \param maxEntries max number of entries to allow (~0 = infinity)
+ * \param deleteFunc optional callback to call when entries
+ * are deleted/replaced
+ */
+struct keymap *
+util_new_keymap(unsigned keySize, unsigned maxEntries,
+ keymap_delete_func deleteFunc)
+{
+ struct keymap *map = MALLOC_STRUCT(keymap);
+ if (!map)
+ return NULL;
+
+ map->cso = cso_hash_create();
+ if (!map->cso) {
+ FREE(map);
+ return NULL;
+ }
+
+ map->max_entries = maxEntries;
+ map->num_entries = 0;
+ map->key_size = keySize;
+ map->delete_func = deleteFunc ? deleteFunc : default_delete_func;
+
+ return map;
+}
+
+
+/**
+ * Delete/free a keymap and all entries. The deleteFunc that was given at
+ * create time will be called for each entry.
+ * \param user user-provided pointer passed through to the delete callback
+ */
+void
+util_delete_keymap(struct keymap *map, void *user)
+{
+ util_keymap_remove_all(map, user);
+ cso_hash_delete(map->cso);
+ FREE(map);
+}
+
+
+static INLINE struct cso_hash_iter
+hash_table_find_iter(const struct keymap *map, const void *key,
+ unsigned key_hash)
+{
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ iter = cso_hash_find(map->cso, key_hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ item = (struct keymap_item *) cso_hash_iter_data(iter);
+ if (!memcmp(item->key, key, map->key_size))
+ break;
+ iter = cso_hash_iter_next(iter);
+ }
+
+ return iter;
+}
+
+
+static INLINE struct keymap_item *
+hash_table_find_item(const struct keymap *map, const void *key,
+ unsigned key_hash)
+{
+ struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash);
+ if (cso_hash_iter_is_null(iter)) {
+ return NULL;
+ }
+ else {
+ return hash_table_item(iter);
+ }
+}
+
+
+/**
+ * Insert a new key + data pointer into the table.
+ * Note: we create a copy of the key, but not the data!
+ * If the key is already present in the table, replace the existing
+ * entry (calling the delete callback on the previous entry).
+ * If the maximum capacity of the map is reached an old entry
+ * will be deleted (the delete callback will be called).
+ */
+boolean
+util_keymap_insert(struct keymap *map, const void *key,
+ const void *data, void *user)
+{
+ unsigned key_hash;
+ struct keymap_item *item;
+ struct cso_hash_iter iter;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ item = hash_table_find_item(map, key, key_hash);
+ if (item) {
+ /* call delete callback for old entry/item */
+ map->delete_func(map, item->key, item->value, user);
+ item->value = (void *) data;
+ return TRUE;
+ }
+
+ item = MALLOC_STRUCT(keymap_item);
+ if (!item)
+ return FALSE;
+
+ item->key = mem_dup(key, map->key_size);
+ item->value = (void *) data;
+
+ iter = cso_hash_insert(map->cso, key_hash, item);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(item);
+ return FALSE;
+ }
+
+ map->num_entries++;
+
+ return TRUE;
+}
+
+
+/**
+ * Look up a key in the map and return the associated data pointer.
+ */
+const void *
+util_keymap_lookup(const struct keymap *map, const void *key)
+{
+ unsigned key_hash;
+ struct keymap_item *item;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ item = hash_table_find_item(map, key, key_hash);
+ if (!item)
+ return NULL;
+
+ return item->value;
+}
+
+
+/**
+ * Remove an entry from the map.
+ * The delete callback will be called if the given key/entry is found.
+ * \param user passed to the delete callback as the last param.
+ */
+void
+util_keymap_remove(struct keymap *map, const void *key, void *user)
+{
+ unsigned key_hash;
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ assert(map);
+
+ key_hash = hash(key, map->key_size);
+
+ iter = hash_table_find_iter(map, key, key_hash);
+ if (cso_hash_iter_is_null(iter))
+ return;
+
+ item = hash_table_item(iter);
+ assert(item);
+ map->delete_func(map, item->key, item->value, user);
+ FREE(item->key);
+ FREE(item);
+
+ map->num_entries--;
+
+ cso_hash_erase(map->cso, iter);
+}
+
+
+/**
+ * Remove all entries from the map, calling the delete callback for each.
+ * \param user passed to the delete callback as the last param.
+ */
+void
+util_keymap_remove_all(struct keymap *map, void *user)
+{
+ struct cso_hash_iter iter;
+ struct keymap_item *item;
+
+ assert(map);
+
+ iter = cso_hash_first_node(map->cso);
+ while (!cso_hash_iter_is_null(iter)) {
+ item = (struct keymap_item *)
+ cso_hash_take(map->cso, cso_hash_iter_key(iter));
+ map->delete_func(map, item->key, item->value, user);
+ FREE(item->key);
+ FREE(item);
+ iter = cso_hash_first_node(map->cso);
+ }
+}
+
+
+extern void
+util_keymap_info(const struct keymap *map)
+{
+ debug_printf("Keymap %p: %u of max %u entries\n",
+ (void *) map, map->num_entries, map->max_entries);
+}
diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h
new file mode 100644
index 0000000000..8d60a76fc3
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_keymap.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_KEYMAP_H
+#define U_KEYMAP_H
+
+#include "pipe/p_compiler.h"
+
+
+/** opaque keymap type */
+struct keymap;
+
+
+/** Delete/callback function type */
+typedef void (*keymap_delete_func)(const struct keymap *map,
+ const void *key, void *data,
+ void *user);
+
+
+extern struct keymap *
+util_new_keymap(unsigned keySize, unsigned maxEntries,
+ keymap_delete_func deleteFunc);
+
+extern void
+util_delete_keymap(struct keymap *map, void *user);
+
+extern boolean
+util_keymap_insert(struct keymap *map, const void *key,
+ const void *data, void *user);
+
+extern const void *
+util_keymap_lookup(const struct keymap *map, const void *key);
+
+extern void
+util_keymap_remove(struct keymap *map, const void *key, void *user);
+
+extern void
+util_keymap_remove_all(struct keymap *map, void *user);
+
+extern void
+util_keymap_info(const struct keymap *map);
+
+
+#endif /* U_KEYMAP_H */
diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c
new file mode 100644
index 0000000000..6be365e53b
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linear.c
@@ -0,0 +1,70 @@
+
+#include "util/u_debug.h"
+#include "u_linear.h"
+
+void
+pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
+ struct pipe_tile_info *t, void *dst_ptr)
+{
+ int x, y, z;
+ char *ptr;
+ size_t bytes = t->cols * t->block.size;
+ char *dst_ptr2 = (char *) dst_ptr;
+
+ assert(pipe_linear_check_tile(t));
+
+ /* lets write lineary to the tiled buffer */
+ for (y = 0; y < t->tiles_y; y++) {
+ for (x = 0; x < t->tiles_x; x++) {
+ /* this inner loop could be replace with SSE magic */
+ ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x;
+ for (z = 0; z < t->rows; z++) {
+ memcpy(dst_ptr2, ptr, bytes);
+ dst_ptr2 += bytes;
+ ptr += src_stride;
+ }
+ }
+ }
+}
+
+void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
+ size_t dst_stride, void *dst_ptr)
+{
+ int x, y, z;
+ char *ptr;
+ size_t bytes = t->cols * t->block.size;
+ const char *src_ptr2 = (const char *) src_ptr;
+
+ /* lets read lineary from the tiled buffer */
+ for (y = 0; y < t->tiles_y; y++) {
+ for (x = 0; x < t->tiles_x; x++) {
+ /* this inner loop could be replace with SSE magic */
+ ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x;
+ for (z = 0; z < t->rows; z++) {
+ memcpy(ptr, src_ptr2, bytes);
+ src_ptr2 += bytes;
+ ptr += dst_stride;
+ }
+ }
+ }
+}
+
+void
+pipe_linear_fill_info(struct pipe_tile_info *t,
+ const struct pipe_format_block *block,
+ unsigned tile_width, unsigned tile_height,
+ unsigned tiles_x, unsigned tiles_y)
+{
+ t->block = *block;
+
+ t->tile.width = tile_width;
+ t->tile.height = tile_height;
+ t->cols = t->tile.width / t->block.width;
+ t->rows = t->tile.height / t->block.height;
+ t->tile.size = t->cols * t->rows * t->block.size;
+
+ t->tiles_x = tiles_x;
+ t->tiles_y = tiles_y;
+ t->stride = t->cols * t->tiles_x * t->block.size;
+ t->size = t->tiles_x * t->tiles_y * t->tile.size;
+}
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
new file mode 100644
index 0000000000..1589f029bc
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -0,0 +1,61 @@
+
+#ifndef U_LINEAR_H
+#define U_LINEAR_H
+
+#include "pipe/p_format.h"
+
+struct pipe_tile_info
+{
+ unsigned size;
+ unsigned stride;
+
+ /* The number of tiles */
+ unsigned tiles_x;
+ unsigned tiles_y;
+
+ /* size of each tile expressed in blocks */
+ unsigned cols;
+ unsigned rows;
+
+ /* Describe the tile in pixels */
+ struct pipe_format_block tile;
+
+ /* Describe each block within the tile */
+ struct pipe_format_block block;
+};
+
+void pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
+ struct pipe_tile_info *t, void *dst_ptr);
+
+void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
+ size_t dst_stride, void *dst_ptr);
+
+/**
+ * Convenience function to fillout a pipe_tile_info struct.
+ * @t info to fill out.
+ * @block block info about pixel layout
+ * @tile_width the width of the tile in pixels
+ * @tile_height the height of the tile in pixels
+ * @tiles_x number of tiles in x axis
+ * @tiles_y number of tiles in y axis
+ */
+void pipe_linear_fill_info(struct pipe_tile_info *t,
+ const struct pipe_format_block *block,
+ unsigned tile_width, unsigned tile_height,
+ unsigned tiles_x, unsigned tiles_y);
+
+static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
+{
+ if (t->tile.size != t->block.size * t->cols * t->rows)
+ return FALSE;
+
+ if (t->stride != t->block.size * t->cols * t->tiles_x)
+ return FALSE;
+
+ if (t->size < t->stride * t->rows * t->tiles_y)
+ return FALSE;
+
+ return TRUE;
+}
+
+#endif /* U_LINEAR_H */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 1ae3234423..1ecde7a912 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -40,7 +40,7 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -68,7 +68,7 @@ __inline double ceil(double val)
return ceil_val;
}
-#ifndef PIPE_SUBSYSTEM_WINDOWS_CE
+#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL
__inline double floor(double val)
{
double floor_val;
@@ -341,6 +341,10 @@ unsigned ffs( unsigned u )
}
#endif
+#ifdef __MINGW32__
+#define ffs __builtin_ffs
+#endif
+
/**
* Return float bits.
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
index 79e34e185f..ceb3a1cb61 100644
--- a/src/gallium/auxiliary/util/u_memory.h
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -36,7 +36,7 @@
#include "util/u_pointer.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#ifdef __cplusplus
@@ -56,7 +56,7 @@ extern "C" {
/* memory debugging */
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#define MALLOC( _size ) \
debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
@@ -151,6 +151,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T))
+#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size))
+
/**
* Return memory on given byte alignment
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 01dd67c810..151a480d34 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -24,14 +24,14 @@
#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_mm.h"
void
-mmDumpMemInfo(const struct mem_block *heap)
+u_mmDumpMemInfo(const struct mem_block *heap)
{
debug_printf("Memory heap %p:\n", (void *)heap);
if (heap == 0) {
@@ -58,7 +58,7 @@ mmDumpMemInfo(const struct mem_block *heap)
}
struct mem_block *
-mmInit(int ofs, int size)
+u_mmInit(int ofs, int size)
{
struct mem_block *heap, *block;
@@ -165,7 +165,7 @@ SliceBlock(struct mem_block *p,
struct mem_block *
-mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
+u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
{
struct mem_block *p;
const int mask = (1 << align2)-1;
@@ -202,7 +202,7 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
struct mem_block *
-mmFindBlock(struct mem_block *heap, int start)
+u_mmFindBlock(struct mem_block *heap, int start)
{
struct mem_block *p;
@@ -241,7 +241,7 @@ Join2Blocks(struct mem_block *p)
}
int
-mmFreeMem(struct mem_block *b)
+u_mmFreeMem(struct mem_block *b)
{
if (!b)
return 0;
@@ -270,7 +270,7 @@ mmFreeMem(struct mem_block *b)
void
-mmDestroy(struct mem_block *heap)
+u_mmDestroy(struct mem_block *heap)
{
struct mem_block *p;
diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h
index b226b101cb..ce20e48763 100644
--- a/src/gallium/auxiliary/util/u_mm.h
+++ b/src/gallium/auxiliary/util/u_mm.h
@@ -49,7 +49,7 @@ struct mem_block {
* input: total size in bytes
* return: a heap pointer if OK, NULL if error
*/
-extern struct mem_block *mmInit(int ofs, int size);
+extern struct mem_block *u_mmInit(int ofs, int size);
/**
* Allocate 'size' bytes with 2^align2 bytes alignment,
@@ -61,7 +61,7 @@ extern struct mem_block *mmInit(int ofs, int size);
* startSearch = linear offset from start of heap to begin search
* return: pointer to the allocated block, 0 if error
*/
-extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2,
+extern struct mem_block *u_mmAllocMem(struct mem_block *heap, int size, int align2,
int startSearch);
/**
@@ -69,23 +69,23 @@ extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2
* input: pointer to a block
* return: 0 if OK, -1 if error
*/
-extern int mmFreeMem(struct mem_block *b);
+extern int u_mmFreeMem(struct mem_block *b);
/**
* Free block starts at offset
* input: pointer to a heap, start offset
* return: pointer to a block
*/
-extern struct mem_block *mmFindBlock(struct mem_block *heap, int start);
+extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start);
/**
* destroy MM
*/
-extern void mmDestroy(struct mem_block *mmInit);
+extern void u_mmDestroy(struct mem_block *mmInit);
/**
* For debuging purpose.
*/
-extern void mmDumpMemInfo(const struct mem_block *mmInit);
+extern void u_mmDumpMemInfo(const struct mem_block *mmInit);
#endif
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index e45e84ded2..d7c3995dbf 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -119,4 +119,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
}
+static INLINE boolean u_reduced_prim( unsigned pipe_prim )
+{
+ switch (pipe_prim) {
+ case PIPE_PRIM_POINTS:
+ return PIPE_PRIM_POINTS;
+
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ return PIPE_PRIM_LINES;
+
+ default:
+ return PIPE_PRIM_TRIANGLES;
+ }
+}
+
#endif
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
new file mode 100644
index 0000000000..089bbbc48a
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_screen.c
@@ -0,0 +1,143 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "u_simple_screen.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/internal/p_winsys_screen.h"
+
+
+static struct pipe_buffer *
+pass_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ return screen->winsys->buffer_create(screen->winsys,
+ alignment, usage, size);
+}
+
+static struct pipe_buffer *
+pass_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ return screen->winsys->user_buffer_create(screen->winsys,
+ ptr, bytes);
+}
+
+static struct pipe_buffer *
+pass_surface_buffer_create(struct pipe_screen *screen,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned usage,
+ unsigned *stride)
+{
+ return screen->winsys->surface_buffer_create(screen->winsys,
+ width, height,
+ format, usage, stride);
+}
+
+static void *
+pass_buffer_map(struct pipe_screen *screen,
+ struct pipe_buffer *buf,
+ unsigned usage)
+{
+ return screen->winsys->buffer_map(screen->winsys,
+ buf, usage);
+}
+
+static void
+pass_buffer_unmap(struct pipe_screen *screen,
+ struct pipe_buffer *buf)
+{
+ screen->winsys->buffer_unmap(screen->winsys, buf);
+}
+
+static void
+pass_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_buffer *buf)
+{
+ screen->winsys->buffer_destroy(screen->winsys, buf);
+}
+
+
+static void
+pass_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_surface *surf,
+ void *context_private)
+{
+ screen->winsys->flush_frontbuffer(screen->winsys,
+ surf, context_private);
+}
+
+static void
+pass_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ screen->winsys->fence_reference(screen->winsys,
+ ptr, fence);
+}
+
+static int
+pass_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return screen->winsys->fence_signalled(screen->winsys,
+ fence, flag);
+}
+
+static int
+pass_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ return screen->winsys->fence_finish(screen->winsys,
+ fence, flag);
+}
+
+void u_simple_screen_init(struct pipe_screen *screen)
+{
+ screen->buffer_create = pass_buffer_create;
+ screen->user_buffer_create = pass_user_buffer_create;
+ screen->surface_buffer_create = pass_surface_buffer_create;
+
+ screen->buffer_map = pass_buffer_map;
+ screen->buffer_unmap = pass_buffer_unmap;
+ screen->buffer_destroy = pass_buffer_destroy;
+ screen->flush_frontbuffer = pass_flush_frontbuffer;
+ screen->fence_reference = pass_fence_reference;
+ screen->fence_signalled = pass_fence_signalled;
+ screen->fence_finish = pass_fence_finish;
+}
+
+const char* u_simple_screen_winsys_name(struct pipe_screen *screen)
+{
+ return screen->winsys->get_name(screen->winsys);
+}
diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h
new file mode 100644
index 0000000000..6612a8a7c0
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_simple_screen.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_SIMPLE_SCREEN_H
+#define U_SIMPLE_SCREEN_H
+
+struct pipe_screen;
+struct pipe_winsys;
+
+/**
+ * The following function initializes a simple passthrough screen.
+ *
+ * All the relevant screen function pointers will forwarded to the
+ * winsys.
+ */
+void u_simple_screen_init(struct pipe_screen *screen);
+
+/**
+ * Returns the name of the winsys associated with this screen.
+ */
+const char* u_simple_screen_winsys_name(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index f06d13c2c4..3cd2d52c64 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -34,10 +34,10 @@
#include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
+#include "pipe/p_screen.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
new file mode 100644
index 0000000000..e2a8491e62
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * SSE intrinsics portability header.
+ *
+ * Although the SSE intrinsics are support by all modern x86 and x86-64
+ * compilers, there are some intrisincs missing in some implementations
+ * (especially older MSVC versions). This header abstracts that away.
+ */
+
+#ifndef U_SSE_H_
+#define U_SSE_H_
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_SSE)
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+
+/* MSVC before VC8 does not support the _mm_castxxx_yyy */
+#if defined(_MSC_VER) && _MSC_VER < 1500
+
+union __declspec(align(16)) m128_types {
+ __m128 m128;
+ __m128i m128i;
+ __m128d m128d;
+};
+
+static __inline __m128
+_mm_castsi128_ps(__m128i a)
+{
+ union m128_types u;
+ u.m128i = a;
+ return u.m128;
+}
+
+static __inline __m128i
+_mm_castps_si128(__m128 a)
+{
+ union m128_types u;
+ u.m128 = a;
+ return u.m128i;
+}
+
+#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+
+#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 853c503f4f..32f6b072a0 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -460,7 +460,7 @@ l8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned r;
r = float_to_ubyte(pRow[0]);
- *dst++ = r;
+ *dst++ = (ubyte) r;
}
p += src_stride;
}
@@ -504,7 +504,7 @@ a8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned a;
a = float_to_ubyte(pRow[3]);
- *dst++ = a;
+ *dst++ = (ubyte) a;
}
p += src_stride;
}
@@ -634,7 +634,7 @@ i8_put_tile_rgba(ubyte *dst,
for (j = 0; j < w; j++, pRow += 4) {
unsigned r;
r = float_to_ubyte(pRow[0]);
- *dst++ = r;
+ *dst++ = (ubyte) r;
}
p += src_stride;
}
@@ -769,6 +769,32 @@ z24s8_get_tile_rgba(const unsigned *src,
}
+/*** PIPE_FORMAT_Z32_FLOAT ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z32f_get_tile_rgba(const float *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = *src++;
+ }
+ p += dst_stride;
+ }
+}
+
+
/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
/**
@@ -913,6 +939,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
case PIPE_FORMAT_Z24S8_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_YCBCR:
ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE);
break;
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index f84514165a..dde2c74fa8 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -200,7 +200,7 @@ util_time_timeout(const struct util_time *start,
}
-#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY)
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
void util_time_sleep(unsigned usecs)
{
LONGLONG start, curr, end;
diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c
index 8beb3b4c88..f237e12d73 100644
--- a/src/gallium/auxiliary/util/u_timed_winsys.c
+++ b/src/gallium/auxiliary/util/u_timed_winsys.c
@@ -29,7 +29,7 @@
* Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com>
*/
-#include "pipe/p_winsys.h"
+#include "pipe/internal/p_winsys_screen.h"
#include "u_timed_winsys.h"
#include "util/u_memory.h"
#include "util/u_time.h"
@@ -121,7 +121,8 @@ timed_buffer_create(struct pipe_winsys *winsys,
struct pipe_winsys *backend = timed_winsys(winsys)->backend;
uint64_t start = time_start();
- struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size );
+ struct pipe_buffer *buf =
+ backend->buffer_create( backend, alignment, usage, size );
time_finish(winsys, start, 0, __FUNCTION__);
@@ -205,34 +206,18 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys,
-static struct pipe_surface *
-timed_surface_alloc(struct pipe_winsys *winsys)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- uint64_t start = time_start();
-
- struct pipe_surface *surf = backend->surface_alloc( backend );
-
- time_finish(winsys, start, 6, __FUNCTION__);
-
- return surf;
-}
-
-
-
-static int
-timed_surface_alloc_storage(struct pipe_winsys *winsys,
- struct pipe_surface *surf,
+static struct pipe_buffer *
+timed_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
- unsigned flags,
- unsigned tex_usage)
+ unsigned usage,
+ unsigned *stride)
{
struct pipe_winsys *backend = timed_winsys(winsys)->backend;
uint64_t start = time_start();
- int ret = backend->surface_alloc_storage( backend, surf, width, height,
- format, flags, tex_usage );
+ struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height,
+ format, usage, stride );
time_finish(winsys, start, 7, __FUNCTION__);
@@ -240,19 +225,6 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys,
}
-static void
-timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s)
-{
- struct pipe_winsys *backend = timed_winsys(winsys)->backend;
- uint64_t start = time_start();
-
- backend->surface_release( backend, s );
-
- time_finish(winsys, start, 8, __FUNCTION__);
-}
-
-
-
static const char *
timed_get_name( struct pipe_winsys *winsys )
{
@@ -329,11 +301,9 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend )
ws->base.buffer_unmap = timed_buffer_unmap;
ws->base.buffer_destroy = timed_buffer_destroy;
ws->base.buffer_create = timed_buffer_create;
+ ws->base.surface_buffer_create = timed_surface_buffer_create;
ws->base.flush_frontbuffer = timed_flush_frontbuffer;
ws->base.get_name = timed_get_name;
- ws->base.surface_alloc = timed_surface_alloc;
- ws->base.surface_alloc_storage = timed_surface_alloc_storage;
- ws->base.surface_release = timed_surface_release;
ws->base.fence_reference = timed_fence_reference;
ws->base.fence_signalled = timed_fence_signalled;
ws->base.fence_finish = timed_fence_finish;