summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c48
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.h28
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c418
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h64
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c23
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.h4
-rw-r--r--src/gallium/auxiliary/draw/Makefile58
-rw-r--r--src/gallium/auxiliary/draw/SConscript56
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c234
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h55
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c222
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.h125
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c (renamed from src/gallium/auxiliary/draw/draw_aaline.c)134
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c (renamed from src/gallium/auxiliary/draw/draw_aapoint.c)88
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c (renamed from src/gallium/auxiliary/draw/draw_clip.c)80
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c (renamed from src/gallium/auxiliary/draw/draw_cull.c)31
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c (renamed from src/gallium/auxiliary/draw/draw_flatshade.c)29
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c (renamed from src/gallium/auxiliary/draw/draw_offset.c)29
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c (renamed from src/gallium/auxiliary/draw/draw_pstipple.c)130
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c (renamed from src/gallium/auxiliary/draw/draw_stipple.c)22
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c (renamed from src/gallium/auxiliary/draw/draw_twoside.c)37
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_unfilled.c (renamed from src/gallium/auxiliary/draw/draw_unfilled.c)53
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_util.c137
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c (renamed from src/gallium/auxiliary/draw/draw_validate.c)41
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c (renamed from src/gallium/auxiliary/draw/draw_vbuf.c)368
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c (renamed from src/gallium/auxiliary/draw/draw_wide_line.c)18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c (renamed from src/gallium/auxiliary/draw/draw_wide_point.c)43
-rw-r--r--src/gallium/auxiliary/draw/draw_prim.c523
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h359
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c240
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h110
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c203
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c191
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c236
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c328
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c235
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_pipeline.c168
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c227
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c249
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h144
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c357
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h174
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c59
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_cache.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_fetch.c528
-rw-r--r--src/gallium/auxiliary/draw/draw_vf.c432
-rw-r--r--src/gallium/auxiliary/draw/draw_vf.h236
-rw-r--r--src/gallium/auxiliary/draw/draw_vf_generic.c585
-rw-r--r--src/gallium/auxiliary/draw/draw_vf_sse.c613
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c (renamed from src/gallium/auxiliary/draw/draw_vertex_shader.c)52
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h39
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c230
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c169
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c235
-rw-r--r--src/gallium/auxiliary/draw/draw_wide_prims.c366
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile8
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.cpp9
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.h13
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_builtins.cpp707
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_cpu.cpp56
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp83
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.h4
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp196
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h13
-rw-r--r--src/gallium/auxiliary/gallivm/llvm_builtins.c3
-rw-r--r--src/gallium/auxiliary/gallivm/soabuiltins.c133
-rw-r--r--src/gallium/auxiliary/gallivm/storage.cpp48
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp18
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp10
-rw-r--r--src/gallium/auxiliary/pipebuffer/Makefile2
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h11
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c136
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h38
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c344
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c495
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c23
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c410
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h21
-rw-r--r--src/gallium/auxiliary/sct/sct.c1
-rw-r--r--src/gallium/auxiliary/tgsi/exec/Makefile2
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c171
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.h12
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c1090
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.h11
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c439
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.h10
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.c19
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.h37
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.c15
-rw-r--r--src/gallium/auxiliary/translate/Makefile15
-rw-r--r--src/gallium/auxiliary/translate/SConscript12
-rw-r--r--src/gallium/auxiliary/translate/translate.c (renamed from src/gallium/auxiliary/draw/draw_debug.c)87
-rw-r--r--src/gallium/auxiliary/translate/translate.h127
-rw-r--r--src/gallium/auxiliary/translate/translate_cache.c102
-rw-r--r--src/gallium/auxiliary/translate/translate_cache.h54
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c676
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c625
-rw-r--r--src/gallium/auxiliary/util/Makefile4
-rw-r--r--src/gallium/auxiliary/util/SConscript1
-rw-r--r--src/gallium/auxiliary/util/p_debug.c178
-rw-r--r--src/gallium/auxiliary/util/p_debug_mem.c98
-rw-r--r--src/gallium/auxiliary/util/p_tile.c80
-rw-r--r--src/gallium/auxiliary/util/u_blit.c25
-rw-r--r--src/gallium/auxiliary/util/u_blit.h8
-rw-r--r--src/gallium/auxiliary/util/u_double_list.h8
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c80
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.h10
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h126
-rw-r--r--src/gallium/auxiliary/util/u_snprintf.c16
-rw-r--r--src/gallium/auxiliary/util/u_string.h63
-rw-r--r--src/gallium/auxiliary/util/u_time.c162
-rw-r--r--src/gallium/auxiliary/util/u_time.h101
116 files changed, 9022 insertions, 8315 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 18acab0967..36dc46ff80 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -43,6 +43,9 @@ struct cso_cache {
struct cso_hash *rasterizer_hash;
struct cso_hash *sampler_hash;
int max_size;
+
+ cso_sanitize_callback sanitize_cb;
+ void *sanitize_data;
};
#if 1
@@ -205,9 +208,20 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
}
}
-static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
+
+static INLINE void sanitize_hash(struct cso_cache *sc,
+ struct cso_hash *hash,
+ enum cso_cache_type type,
int max_size)
{
+ if (sc->sanitize_cb)
+ sc->sanitize_cb(hash, type, max_size, sc->sanitize_data);
+}
+
+
+static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
+ int max_size, void *user_data)
+{
/* if we're approach the maximum size, remove fourth of the entries
* otherwise every subsequent call will go through the same */
int hash_size = cso_hash_size(hash);
@@ -231,7 +245,7 @@ cso_insert_state(struct cso_cache *sc,
void *state)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
- sanitize_hash(hash, type, sc->max_size);
+ sanitize_hash(sc, hash, type, sc->max_size);
return cso_hash_insert(hash, hash_key, state);
}
@@ -255,9 +269,9 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash,
while (!cso_hash_iter_is_null(iter)) {
void *iter_data = cso_hash_iter_data(iter);
if (!memcmp(iter_data, templ, size)) {
- /* Return the payload:
+ /* We found a match
*/
- return (unsigned char *)iter_data + size;
+ return iter_data;
}
iter = cso_hash_iter_next(iter);
}
@@ -290,6 +304,8 @@ void * cso_take_state(struct cso_cache *sc,
struct cso_cache *cso_cache_create(void)
{
struct cso_cache *sc = MALLOC_STRUCT(cso_cache);
+ if (sc == NULL)
+ return NULL;
sc->max_size = 4096;
sc->blend_hash = cso_hash_create();
@@ -298,6 +314,8 @@ struct cso_cache *cso_cache_create(void)
sc->rasterizer_hash = cso_hash_create();
sc->fs_hash = cso_hash_create();
sc->vs_hash = cso_hash_create();
+ sc->sanitize_cb = sanitize_cb;
+ sc->sanitize_data = 0;
return sc;
}
@@ -332,10 +350,10 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
iter = cso_hash_first_node(hash);
while (!cso_hash_iter_is_null(iter)) {
void *state = cso_hash_iter_data(iter);
+ iter = cso_hash_iter_next(iter);
if (state) {
func(state, user_data);
}
- iter = cso_hash_iter_next(iter);
}
}
@@ -363,13 +381,13 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
{
sc->max_size = number;
- sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size);
- sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA,
+ sanitize_hash(sc, sc->blend_hash, CSO_BLEND, sc->max_size);
+ sanitize_hash(sc, sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA,
sc->max_size);
- sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size);
- sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
- sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
- sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+ sanitize_hash(sc, sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size);
+ sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
+ sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
+ sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
}
int cso_maximum_cache_size(const struct cso_cache *sc)
@@ -377,3 +395,11 @@ int cso_maximum_cache_size(const struct cso_cache *sc)
return sc->max_size;
}
+void cso_cache_set_sanitize_callback(struct cso_cache *sc,
+ cso_sanitize_callback cb,
+ void *user_data)
+{
+ sc->sanitize_cb = cb;
+ sc->sanitize_data = user_data;
+}
+
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index e5edbbb556..6b5c230e8f 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -84,8 +84,22 @@
extern "C" {
#endif
+enum cso_cache_type {
+ CSO_BLEND,
+ CSO_SAMPLER,
+ CSO_DEPTH_STENCIL_ALPHA,
+ CSO_RASTERIZER,
+ CSO_FRAGMENT_SHADER,
+ CSO_VERTEX_SHADER
+};
+
typedef void (*cso_state_callback)(void *ctx, void *obj);
+typedef void (*cso_sanitize_callback)(struct cso_hash *hash,
+ enum cso_cache_type type,
+ int max_size,
+ void *user_data);
+
struct cso_cache;
struct cso_blend {
@@ -130,21 +144,15 @@ struct cso_sampler {
struct pipe_context *context;
};
-
-enum cso_cache_type {
- CSO_BLEND,
- CSO_SAMPLER,
- CSO_DEPTH_STENCIL_ALPHA,
- CSO_RASTERIZER,
- CSO_FRAGMENT_SHADER,
- CSO_VERTEX_SHADER
-};
-
unsigned cso_construct_key(void *item, int item_size);
struct cso_cache *cso_cache_create(void);
void cso_cache_delete(struct cso_cache *sc);
+void cso_cache_set_sanitize_callback(struct cso_cache *sc,
+ cso_sanitize_callback cb,
+ void *user_data);
+
struct cso_hash_iter cso_insert_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *state);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 4a1a6cb79c..a1a3a9efaf 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -25,16 +25,20 @@
*
**************************************************************************/
- /* Wrap the cso cache & hash mechanisms in a simplified
+ /**
+ * @file
+ *
+ * Wrap the cso cache & hash mechanisms in a simplified
* pipe-driver-specific interface.
*
- * Authors:
- * Zack Rusin <zack@tungstengraphics.com>
- * Keith Whitwell <keith@tungstengraphics.com>
+ * @author Zack Rusin <zack@tungstengraphics.com>
+ * @author Keith Whitwell <keith@tungstengraphics.com>
*/
#include "pipe/p_state.h"
#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+#include "tgsi/util/tgsi_parse.h"
#include "cso_cache/cso_context.h"
#include "cso_cache/cso_cache.h"
@@ -76,6 +80,131 @@ struct cso_context {
};
+static boolean delete_blend_state(struct cso_context *ctx, void *state)
+{
+ struct cso_blend *cso = (struct cso_blend *)state;
+
+ if (ctx->blend == cso->data)
+ return FALSE;
+
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
+static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state)
+{
+ struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state;
+
+ if (ctx->depth_stencil == cso->data)
+ return FALSE;
+
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+
+ return TRUE;
+}
+
+static boolean delete_sampler_state(struct cso_context *ctx, void *state)
+{
+ struct cso_sampler *cso = (struct cso_sampler *)state;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
+static boolean delete_rasterizer_state(struct cso_context *ctx, void *state)
+{
+ struct cso_rasterizer *cso = (struct cso_rasterizer *)state;
+
+ if (ctx->rasterizer == cso->data)
+ return FALSE;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
+static boolean delete_fs_state(struct cso_context *ctx, void *state)
+{
+ struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state;
+ if (ctx->fragment_shader == cso->data)
+ return FALSE;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
+static boolean delete_vs_state(struct cso_context *ctx, void *state)
+{
+ struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state;
+ if (ctx->vertex_shader == cso->data)
+ return TRUE;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return FALSE;
+}
+
+
+static INLINE boolean delete_cso(struct cso_context *ctx,
+ void *state, enum cso_cache_type type)
+{
+ switch (type) {
+ case CSO_BLEND:
+ return delete_blend_state(ctx, state);
+ break;
+ case CSO_SAMPLER:
+ return delete_sampler_state(ctx, state);
+ break;
+ case CSO_DEPTH_STENCIL_ALPHA:
+ return delete_depth_stencil_state(ctx, state);
+ break;
+ case CSO_RASTERIZER:
+ return delete_rasterizer_state(ctx, state);
+ break;
+ case CSO_FRAGMENT_SHADER:
+ return delete_fs_state(ctx, state);
+ break;
+ case CSO_VERTEX_SHADER:
+ return delete_vs_state(ctx, state);
+ break;
+ default:
+ assert(0);
+ FREE(state);
+ }
+ return FALSE;
+}
+
+static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
+ int max_size, void *user_data)
+{
+ struct cso_context *ctx = (struct cso_context *)user_data;
+ /* if we're approach the maximum size, remove fourth of the entries
+ * otherwise every subsequent call will go through the same */
+ int hash_size = cso_hash_size(hash);
+ int max_entries = (max_size > hash_size) ? max_size : hash_size;
+ int to_remove = (max_size < max_entries) * max_entries/4;
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ if (hash_size > max_size)
+ to_remove += hash_size - max_size;
+ while (to_remove) {
+ /*remove elements until we're good */
+ /*fixme: currently we pick the nodes to remove at random*/
+ void *cso = cso_hash_iter_data(iter);
+ if (delete_cso(ctx, cso, type)) {
+ iter = cso_hash_erase(hash, iter);
+ --to_remove;
+ } else
+ iter = cso_hash_iter_next(iter);
+ }
+}
+
+
struct cso_context *cso_create_context( struct pipe_context *pipe )
{
struct cso_context *ctx = CALLOC_STRUCT(cso_context);
@@ -85,6 +214,9 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
ctx->cache = cso_cache_create();
if (ctx->cache == NULL)
goto out;
+ cso_cache_set_sanitize_callback(ctx->cache,
+ sanitize_hash,
+ ctx);
ctx->pipe = pipe;
@@ -98,8 +230,14 @@ out:
return NULL;
}
-static void cso_release_all( struct cso_context *ctx )
+
+/**
+ * Prior to context destruction, this function unbinds all state objects.
+ */
+void cso_release_all( struct cso_context *ctx )
{
+ unsigned i;
+
if (ctx->pipe) {
ctx->pipe->bind_blend_state( ctx->pipe, NULL );
ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL );
@@ -109,6 +247,11 @@ static void cso_release_all( struct cso_context *ctx )
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
}
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&ctx->textures[i], NULL);
+ pipe_texture_reference(&ctx->textures_saved[i], NULL);
+ }
+
if (ctx->cache) {
cso_cache_delete( ctx->cache );
ctx->cache = NULL;
@@ -118,10 +261,10 @@ static void cso_release_all( struct cso_context *ctx )
void cso_destroy_context( struct cso_context *ctx )
{
- if (ctx)
- cso_release_all( ctx );
-
- FREE( ctx );
+ if (ctx) {
+ //cso_release_all( ctx );
+ FREE( ctx );
+ }
}
@@ -135,8 +278,8 @@ void cso_destroy_context( struct cso_context *ctx )
* the data member of the cso to be the template itself.
*/
-void cso_set_blend(struct cso_context *ctx,
- const struct pipe_blend_state *templ)
+enum pipe_error cso_set_blend(struct cso_context *ctx,
+ const struct pipe_blend_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state));
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
@@ -146,6 +289,8 @@ void cso_set_blend(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_blend *cso = MALLOC(sizeof(struct cso_blend));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
@@ -153,6 +298,11 @@ void cso_set_blend(struct cso_context *ctx,
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -163,6 +313,7 @@ void cso_set_blend(struct cso_context *ctx,
ctx->blend = handle;
ctx->pipe->bind_blend_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_blend(struct cso_context *ctx)
@@ -182,12 +333,12 @@ void cso_restore_blend(struct cso_context *ctx)
-void cso_single_sampler(struct cso_context *ctx,
- unsigned idx,
- const struct pipe_sampler_state *templ)
+enum pipe_error cso_single_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
{
void *handle = NULL;
-
+
if (templ != NULL) {
unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state));
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
@@ -196,13 +347,20 @@ void cso_single_sampler(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
-
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
cso->state = *templ;
cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -211,11 +369,12 @@ void cso_single_sampler(struct cso_context *ctx,
}
ctx->samplers[idx] = handle;
+ return PIPE_OK;
}
void cso_single_sampler_done( struct cso_context *ctx )
{
- unsigned i;
+ unsigned i;
/* find highest non-null sampler */
for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
@@ -226,8 +385,8 @@ void cso_single_sampler_done( struct cso_context *ctx )
ctx->nr_samplers = i;
if (ctx->hw.nr_samplers != ctx->nr_samplers ||
- memcmp(ctx->hw.samplers,
- ctx->samplers,
+ memcmp(ctx->hw.samplers,
+ ctx->samplers,
ctx->nr_samplers * sizeof(void *)) != 0)
{
memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *));
@@ -237,22 +396,36 @@ void cso_single_sampler_done( struct cso_context *ctx )
}
}
-void cso_set_samplers( struct cso_context *ctx,
- unsigned nr,
- const struct pipe_sampler_state **templates )
+/*
+ * If the function encouters any errors it will return the
+ * last one. Done to always try to set as many samplers
+ * as possible.
+ */
+enum pipe_error cso_set_samplers( struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates )
{
unsigned i;
-
+ enum pipe_error temp, error = PIPE_OK;
+
/* TODO: fastpath
*/
- for (i = 0; i < nr; i++)
- cso_single_sampler( ctx, i, templates[i] );
+ for (i = 0; i < nr; i++) {
+ temp = cso_single_sampler( ctx, i, templates[i] );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
+
+ for ( ; i < ctx->nr_samplers; i++) {
+ temp = cso_single_sampler( ctx, i, NULL );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
- for ( ; i < ctx->nr_samplers; i++)
- cso_single_sampler( ctx, i, NULL );
-
cso_single_sampler_done( ctx );
+
+ return error;
}
void cso_save_samplers(struct cso_context *ctx)
@@ -263,44 +436,64 @@ void cso_save_samplers(struct cso_context *ctx)
void cso_restore_samplers(struct cso_context *ctx)
{
- cso_set_samplers(ctx, ctx->nr_samplers_saved,
- (const struct pipe_sampler_state **) ctx->samplers_saved);
+ ctx->nr_samplers = ctx->nr_samplers_saved;
+ memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers));
+ cso_single_sampler_done( ctx );
}
-void cso_set_sampler_textures( struct cso_context *ctx,
- uint count,
- struct pipe_texture **textures )
+enum pipe_error cso_set_sampler_textures( struct cso_context *ctx,
+ uint count,
+ struct pipe_texture **textures )
{
uint i;
ctx->nr_textures = count;
for (i = 0; i < count; i++)
- ctx->textures[i] = textures[i];
+ pipe_texture_reference(&ctx->textures[i], textures[i]);
for ( ; i < PIPE_MAX_SAMPLERS; i++)
- ctx->textures[i] = NULL;
+ pipe_texture_reference(&ctx->textures[i], NULL);
ctx->pipe->set_sampler_textures(ctx->pipe, count, textures);
+
+ return PIPE_OK;
}
void cso_save_sampler_textures( struct cso_context *ctx )
{
+ uint i;
+
ctx->nr_textures_saved = ctx->nr_textures;
- memcpy(ctx->textures_saved, ctx->textures, sizeof(ctx->textures));
+ for (i = 0; i < ctx->nr_textures; i++) {
+ assert(!ctx->textures_saved[i]);
+ pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]);
+ }
}
void cso_restore_sampler_textures( struct cso_context *ctx )
{
- cso_set_sampler_textures(ctx, ctx->nr_textures_saved, ctx->textures_saved);
+ uint i;
+
+ ctx->nr_textures = ctx->nr_textures_saved;
+
+ for (i = 0; i < ctx->nr_textures; i++) {
+ pipe_texture_reference(&ctx->textures[i], NULL);
+ ctx->textures[i] = ctx->textures_saved[i];
+ ctx->textures_saved[i] = NULL;
+ }
+ for ( ; i < PIPE_MAX_SAMPLERS; i++)
+ pipe_texture_reference(&ctx->textures[i], NULL);
+
+ ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures);
+
ctx->nr_textures_saved = 0;
}
-
-void cso_set_depth_stencil_alpha(struct cso_context *ctx,
- const struct pipe_depth_stencil_alpha_state *templ)
+enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
+ const struct pipe_depth_stencil_alpha_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_depth_stencil_alpha_state));
@@ -312,13 +505,20 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state;
cso->context = ctx->pipe;
- cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso);
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -329,6 +529,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx,
ctx->depth_stencil = handle;
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_depth_stencil_alpha(struct cso_context *ctx)
@@ -348,8 +549,8 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
-void cso_set_rasterizer(struct cso_context *ctx,
- const struct pipe_rasterizer_state *templ)
+enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
+ const struct pipe_rasterizer_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_rasterizer_state));
@@ -360,13 +561,20 @@ void cso_set_rasterizer(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state;
cso->context = ctx->pipe;
- cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -377,6 +585,7 @@ void cso_set_rasterizer(struct cso_context *ctx,
ctx->rasterizer = handle;
ctx->pipe->bind_rasterizer_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_rasterizer(struct cso_context *ctx)
@@ -395,36 +604,71 @@ void cso_restore_rasterizer(struct cso_context *ctx)
}
-void cso_set_fragment_shader(struct cso_context *ctx,
- const struct pipe_shader_state *templ)
+
+enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx,
+ void *handle )
{
- unsigned hash_key = cso_construct_key((void*)templ,
- sizeof(struct pipe_shader_state));
+ if (ctx->fragment_shader != handle) {
+ ctx->fragment_shader = handle;
+ ctx->pipe->bind_fs_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
+{
+ if (handle == ctx->fragment_shader) {
+ /* unbind before deleting */
+ ctx->pipe->bind_fs_state(ctx->pipe, NULL);
+ ctx->fragment_shader = NULL;
+ }
+ ctx->pipe->delete_fs_state(ctx->pipe, handle);
+}
+
+/* Not really working:
+ */
+#if 0
+enum pipe_error cso_set_fragment_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
+{
+ const struct tgsi_token *tokens = templ->tokens;
+ unsigned num_tokens = tgsi_num_tokens(tokens);
+ size_t tokens_size = num_tokens*sizeof(struct tgsi_token);
+ unsigned hash_key = cso_construct_key((void*)tokens, tokens_size);
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
- hash_key, CSO_FRAGMENT_SHADER,
- (void*)templ);
+ hash_key,
+ CSO_FRAGMENT_SHADER,
+ (void*)tokens);
void *handle = NULL;
if (cso_hash_iter_is_null(iter)) {
- struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader));
+ struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size);
+ struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso));
- cso->state = *templ;
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(cso_tokens, tokens, tokens_size);
+ cso->state.tokens = cso_tokens;
cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data;
}
- if (ctx->fragment_shader != handle) {
- ctx->fragment_shader = handle;
- ctx->pipe->bind_fs_state(ctx->pipe, handle);
- }
+ return cso_set_fragment_shader_handle( ctx, handle );
}
+#endif
void cso_save_fragment_shader(struct cso_context *ctx)
{
@@ -434,7 +678,6 @@ void cso_save_fragment_shader(struct cso_context *ctx)
void cso_restore_fragment_shader(struct cso_context *ctx)
{
- assert(ctx->fragment_shader_saved);
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
ctx->fragment_shader = ctx->fragment_shader_saved;
@@ -443,9 +686,32 @@ void cso_restore_fragment_shader(struct cso_context *ctx)
}
+enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx,
+ void *handle )
+{
+ if (ctx->vertex_shader != handle) {
+ ctx->vertex_shader = handle;
+ ctx->pipe->bind_vs_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
+{
+ if (handle == ctx->vertex_shader) {
+ /* unbind before deleting */
+ ctx->pipe->bind_vs_state(ctx->pipe, NULL);
+ ctx->vertex_shader = NULL;
+ }
+ ctx->pipe->delete_vs_state(ctx->pipe, handle);
+}
+
-void cso_set_vertex_shader(struct cso_context *ctx,
- const struct pipe_shader_state *templ)
+/* Not really working:
+ */
+#if 0
+enum pipe_error cso_set_vertex_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_shader_state));
@@ -457,23 +723,31 @@ void cso_set_vertex_shader(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
cso->state = *templ;
cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data;
}
- if (ctx->vertex_shader != handle) {
- ctx->vertex_shader = handle;
- ctx->pipe->bind_vs_state(ctx->pipe, handle);
- }
+ return cso_set_vertex_shader_handle( ctx, handle );
}
+#endif
+
+
void cso_save_vertex_shader(struct cso_context *ctx)
{
@@ -483,9 +757,8 @@ void cso_save_vertex_shader(struct cso_context *ctx)
void cso_restore_vertex_shader(struct cso_context *ctx)
{
- assert(ctx->vertex_shader_saved);
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
- ctx->pipe->bind_fs_state(ctx->pipe, ctx->vertex_shader_saved);
+ ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
ctx->vertex_shader = ctx->vertex_shader_saved;
}
ctx->vertex_shader_saved = NULL;
@@ -493,14 +766,15 @@ void cso_restore_vertex_shader(struct cso_context *ctx)
-void cso_set_framebuffer(struct cso_context *ctx,
- const struct pipe_framebuffer_state *fb)
+enum pipe_error cso_set_framebuffer(struct cso_context *ctx,
+ const struct pipe_framebuffer_state *fb)
{
/* XXX this memcmp() fails to detect buffer size changes */
if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) {
ctx->fb = *fb;
ctx->pipe->set_framebuffer_state(ctx->pipe, fb);
}
+ return PIPE_OK;
}
void cso_save_framebuffer(struct cso_context *ctx)
@@ -517,13 +791,14 @@ void cso_restore_framebuffer(struct cso_context *ctx)
}
-void cso_set_viewport(struct cso_context *ctx,
- const struct pipe_viewport_state *vp)
+enum pipe_error cso_set_viewport(struct cso_context *ctx,
+ const struct pipe_viewport_state *vp)
{
if (memcmp(&ctx->vp, vp, sizeof(*vp))) {
ctx->vp = *vp;
ctx->pipe->set_viewport_state(ctx->pipe, vp);
}
+ return PIPE_OK;
}
void cso_save_viewport(struct cso_context *ctx)
@@ -543,11 +818,12 @@ void cso_restore_viewport(struct cso_context *ctx)
-void cso_set_blend_color(struct cso_context *ctx,
- const struct pipe_blend_color *bc)
+enum pipe_error cso_set_blend_color(struct cso_context *ctx,
+ const struct pipe_blend_color *bc)
{
if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) {
ctx->blend_color = *bc;
ctx->pipe->set_blend_color(ctx->pipe, bc);
}
+ return PIPE_OK;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 665e8d9911..b04e98bfa1 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -31,6 +31,7 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
+#include "pipe/p_error.h"
#ifdef __cplusplus
@@ -41,51 +42,53 @@ struct cso_context;
struct cso_context *cso_create_context( struct pipe_context *pipe );
+void cso_release_all( struct cso_context *ctx );
+
void cso_destroy_context( struct cso_context *cso );
-void cso_set_blend( struct cso_context *cso,
- const struct pipe_blend_state *blend );
+enum pipe_error cso_set_blend( struct cso_context *cso,
+ const struct pipe_blend_state *blend );
void cso_save_blend(struct cso_context *cso);
void cso_restore_blend(struct cso_context *cso);
-void cso_set_depth_stencil_alpha( struct cso_context *cso,
- const struct pipe_depth_stencil_alpha_state *dsa );
+enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
+ const struct pipe_depth_stencil_alpha_state *dsa );
void cso_save_depth_stencil_alpha(struct cso_context *cso);
void cso_restore_depth_stencil_alpha(struct cso_context *cso);
-void cso_set_rasterizer( struct cso_context *cso,
- const struct pipe_rasterizer_state *rasterizer );
+enum pipe_error cso_set_rasterizer( struct cso_context *cso,
+ const struct pipe_rasterizer_state *rasterizer );
void cso_save_rasterizer(struct cso_context *cso);
void cso_restore_rasterizer(struct cso_context *cso);
-void cso_set_samplers( struct cso_context *cso,
- unsigned count,
- const struct pipe_sampler_state **states );
+enum pipe_error cso_set_samplers( struct cso_context *cso,
+ unsigned count,
+ const struct pipe_sampler_state **states );
void cso_save_samplers(struct cso_context *cso);
void cso_restore_samplers(struct cso_context *cso);
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
*/
-void cso_single_sampler( struct cso_context *cso,
- unsigned nr,
- const struct pipe_sampler_state *states );
+enum pipe_error cso_single_sampler( struct cso_context *cso,
+ unsigned nr,
+ const struct pipe_sampler_state *states );
void cso_single_sampler_done( struct cso_context *cso );
-void cso_set_sampler_textures( struct cso_context *cso,
- uint count,
- struct pipe_texture **textures );
+enum pipe_error cso_set_sampler_textures( struct cso_context *cso,
+ uint count,
+ struct pipe_texture **textures );
void cso_save_sampler_textures( struct cso_context *cso );
void cso_restore_sampler_textures( struct cso_context *cso );
@@ -96,34 +99,43 @@ void cso_restore_sampler_textures( struct cso_context *cso );
* (eg mesa's internall-generated texenv programs), it will be up to
* the state tracker to implement their own specialized caching.
*/
-void cso_set_fragment_shader( struct cso_context *cso,
- const struct pipe_shader_state *shader );
+enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx,
+ void *handle );
+void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
+/*
+enum pipe_error cso_set_fragment_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
+*/
void cso_save_fragment_shader(struct cso_context *cso);
void cso_restore_fragment_shader(struct cso_context *cso);
-
-void cso_set_vertex_shader( struct cso_context *cso,
- const struct pipe_shader_state *shader );
+enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx,
+ void *handle );
+void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
+/*
+enum pipe_error cso_set_vertex_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
+*/
void cso_save_vertex_shader(struct cso_context *cso);
void cso_restore_vertex_shader(struct cso_context *cso);
-void cso_set_framebuffer(struct cso_context *cso,
- const struct pipe_framebuffer_state *fb);
+enum pipe_error cso_set_framebuffer(struct cso_context *cso,
+ const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
void cso_restore_framebuffer(struct cso_context *cso);
-void cso_set_viewport(struct cso_context *cso,
- const struct pipe_viewport_state *vp);
+enum pipe_error cso_set_viewport(struct cso_context *cso,
+ const struct pipe_viewport_state *vp);
void cso_save_viewport(struct cso_context *cso);
void cso_restore_viewport(struct cso_context *cso);
-void cso_set_blend_color(struct cso_context *cso,
- const struct pipe_blend_color *bc);
+enum pipe_error cso_set_blend_color(struct cso_context *cso,
+ const struct pipe_blend_color *bc);
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index ddce3822f7..0646efd952 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -110,6 +110,10 @@ cso_hash_create_node(struct cso_hash *hash,
struct cso_node **anextNode)
{
struct cso_node *node = cso_data_allocate_node(hash->data.d);
+
+ if (!node)
+ return NULL;
+
node->key = akey;
node->value = avalue;
@@ -219,15 +223,30 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
{
struct cso_node **nextNode = cso_hash_find_node(hash, key);
struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode);
- struct cso_hash_iter iter = {hash, node};
- return iter;
+ if (!node) {
+ struct cso_hash_iter null_iter = {hash, 0};
+ return null_iter;
+ }
+
+ {
+ struct cso_hash_iter iter = {hash, node};
+ return iter;
+ }
}
}
struct cso_hash * cso_hash_create(void)
{
struct cso_hash *hash = MALLOC_STRUCT(cso_hash);
+ if (!hash)
+ return NULL;
+
hash->data.d = MALLOC_STRUCT(cso_hash_data);
+ if (!hash->data.d) {
+ FREE(hash);
+ return NULL;
+ }
+
hash->data.d->fakeNext = 0;
hash->data.d->buckets = 0;
hash->data.d->size = 0;
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h
index 73c4742006..85f3e276c6 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.h
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.h
@@ -106,12 +106,12 @@ struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter);
/**
- * Convenience routine to iterate over the collision list while doing a memory
+ * Convenience routine to iterate over the collision list while doing a memory
* comparison to see which entry in the list is a direct copy of our template
* and returns that entry.
*/
void *cso_hash_find_data_from_template( struct cso_hash *hash,
- unsigned hash_key,
+ unsigned hash_key,
void *templ,
int size );
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 28262a92c6..da7eded21f 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -4,39 +4,37 @@ include $(TOP)/configs/current
LIBNAME = draw
C_SOURCES = \
- draw_aaline.c \
- draw_aapoint.c \
- draw_clip.c \
- draw_vs_exec.c \
- draw_vs_sse.c \
- draw_vs_llvm.c \
- draw_context.c\
- draw_cull.c \
- draw_debug.c \
- draw_flatshade.c \
- draw_offset.c \
+ draw_context.c \
+ draw_pipe.c \
+ draw_pipe_aaline.c \
+ draw_pipe_aapoint.c \
+ draw_pipe_clip.c \
+ draw_pipe_cull.c \
+ draw_pipe_flatshade.c \
+ draw_pipe_offset.c \
+ draw_pipe_pstipple.c \
+ draw_pipe_stipple.c \
+ draw_pipe_twoside.c \
+ draw_pipe_unfilled.c \
+ draw_pipe_util.c \
+ draw_pipe_validate.c \
+ draw_pipe_vbuf.c \
+ draw_pipe_wide_line.c \
+ draw_pipe_wide_point.c \
draw_pt.c \
- draw_pt_vcache.c \
- draw_pt_fetch_emit.c \
- draw_pt_fetch_pipeline.c \
- draw_pt_pipeline.c \
draw_pt_elts.c \
- draw_prim.c \
- draw_pstipple.c \
- draw_stipple.c \
- draw_twoside.c \
- draw_unfilled.c \
- draw_validate.c \
- draw_vbuf.c \
+ draw_pt_emit.c \
+ draw_pt_fetch.c \
+ draw_pt_fetch_emit.c \
+ draw_pt_fetch_shade_pipeline.c \
+ draw_pt_post_vs.c \
+ draw_pt_varray.c \
+ draw_pt_vcache.c \
draw_vertex.c \
- draw_vertex_cache.c \
- draw_vertex_fetch.c \
- draw_vertex_shader.c \
- draw_vf.c \
- draw_vf_generic.c \
- draw_vf_sse.c \
- draw_wide_line.c \
- draw_wide_point.c
+ draw_vs.c \
+ draw_vs_exec.c \
+ draw_vs_llvm.c \
+ draw_vs_sse.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 52107912f5..3b5d5ed492 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -3,39 +3,37 @@ Import('*')
draw = env.ConvenienceLibrary(
target = 'draw',
source = [
- 'draw_aaline.c',
- 'draw_aapoint.c',
- 'draw_clip.c',
- 'draw_vs_exec.c',
- 'draw_vs_sse.c',
- 'draw_vs_llvm.c',
'draw_context.c',
- 'draw_cull.c',
- 'draw_debug.c',
- 'draw_flatshade.c',
- 'draw_offset.c',
+ 'draw_pipe.c',
+ 'draw_pipe_aaline.c',
+ 'draw_pipe_aapoint.c',
+ 'draw_pipe_clip.c',
+ 'draw_pipe_cull.c',
+ 'draw_pipe_flatshade.c',
+ 'draw_pipe_offset.c',
+ 'draw_pipe_pstipple.c',
+ 'draw_pipe_stipple.c',
+ 'draw_pipe_twoside.c',
+ 'draw_pipe_unfilled.c',
+ 'draw_pipe_util.c',
+ 'draw_pipe_validate.c',
+ 'draw_pipe_vbuf.c',
+ 'draw_pipe_wide_line.c',
+ 'draw_pipe_wide_point.c',
'draw_pt.c',
- 'draw_pt_vcache.c',
- 'draw_pt_fetch_emit.c',
- 'draw_pt_fetch_pipeline.c',
- 'draw_pt_pipeline.c',
'draw_pt_elts.c',
- 'draw_prim.c',
- 'draw_pstipple.c',
- 'draw_stipple.c',
- 'draw_twoside.c',
- 'draw_unfilled.c',
- 'draw_validate.c',
- 'draw_vbuf.c',
+ 'draw_pt_emit.c',
+ 'draw_pt_fetch.c',
+ 'draw_pt_fetch_emit.c',
+ 'draw_pt_fetch_shade_pipeline.c',
+ 'draw_pt_post_vs.c',
+ 'draw_pt_varray.c',
+ 'draw_pt_vcache.c',
'draw_vertex.c',
- 'draw_vertex_cache.c',
- 'draw_vertex_fetch.c',
- 'draw_vertex_shader.c',
- 'draw_vf.c',
- 'draw_vf_generic.c',
- 'draw_vf_sse.c',
- 'draw_wide_point.c',
- 'draw_wide_line.c'
+ 'draw_vs.c',
+ 'draw_vs_exec.c',
+ 'draw_vs_llvm.c',
+ 'draw_vs_sse.c',
])
auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index b3c65c90d6..98e23fa830 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -33,8 +33,10 @@
#include "pipe/p_util.h"
#include "draw_context.h"
-#include "draw_private.h"
#include "draw_vbuf.h"
+#include "draw_vs.h"
+#include "draw_pt.h"
+#include "draw_pipe.h"
struct draw_context *draw_create( void )
@@ -43,38 +45,6 @@ struct draw_context *draw_create( void )
if (draw == NULL)
goto fail;
-#if defined(__i386__) || defined(__386__)
- draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL;
-#else
- draw->use_sse = FALSE;
-#endif
-
- /* create pipeline stages */
- draw->pipeline.wide_line = draw_wide_line_stage( draw );
- draw->pipeline.wide_point = draw_wide_point_stage( draw );
- draw->pipeline.stipple = draw_stipple_stage( draw );
- draw->pipeline.unfilled = draw_unfilled_stage( draw );
- draw->pipeline.twoside = draw_twoside_stage( draw );
- draw->pipeline.offset = draw_offset_stage( draw );
- draw->pipeline.clip = draw_clip_stage( draw );
- draw->pipeline.flatshade = draw_flatshade_stage( draw );
- draw->pipeline.cull = draw_cull_stage( draw );
- draw->pipeline.validate = draw_validate_stage( draw );
- draw->pipeline.first = draw->pipeline.validate;
-
- if (!draw->pipeline.wide_line ||
- !draw->pipeline.wide_point ||
- !draw->pipeline.stipple ||
- !draw->pipeline.unfilled ||
- !draw->pipeline.twoside ||
- !draw->pipeline.offset ||
- !draw->pipeline.clip ||
- !draw->pipeline.flatshade ||
- !draw->pipeline.cull ||
- !draw->pipeline.validate)
- goto fail;
-
-
ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
@@ -83,31 +53,18 @@ struct draw_context *draw_create( void )
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
- /* Statically allocate maximum sized vertices for the cache - could be cleverer...
- */
- {
- uint i;
- const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f;
- char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16);
- if (!tmp)
- goto fail;
-
- for (i = 0; i < Elements(draw->vs.queue); i++)
- draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size);
- }
- draw->shader_queue_flush = draw_vertex_shader_queue_flush;
+ draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
- /* these defaults are oriented toward the needs of softpipe */
- draw->wide_point_threshold = 1000000.0; /* infinity */
- draw->wide_line_threshold = 1.0;
- draw->line_stipple = TRUE;
- draw->point_sprite = TRUE;
+ tgsi_exec_machine_init(&draw->machine);
- draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
+ /* FIXME: give this machine thing a proper constructor:
+ */
+ draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- draw_vertex_cache_invalidate( draw );
- draw_set_mapped_element_buffer( draw, 0, NULL );
+ if (!draw_pipeline_init( draw ))
+ goto fail;
if (!draw_pt_init( draw ))
goto fail;
@@ -125,39 +82,14 @@ void draw_destroy( struct draw_context *draw )
if (!draw)
return;
- if (draw->pipeline.wide_line)
- draw->pipeline.wide_line->destroy( draw->pipeline.wide_line );
- if (draw->pipeline.wide_point)
- draw->pipeline.wide_point->destroy( draw->pipeline.wide_point );
- if (draw->pipeline.stipple)
- draw->pipeline.stipple->destroy( draw->pipeline.stipple );
- if (draw->pipeline.unfilled)
- draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
- if (draw->pipeline.twoside)
- draw->pipeline.twoside->destroy( draw->pipeline.twoside );
- if (draw->pipeline.offset)
- draw->pipeline.offset->destroy( draw->pipeline.offset );
- if (draw->pipeline.clip)
- draw->pipeline.clip->destroy( draw->pipeline.clip );
- if (draw->pipeline.flatshade)
- draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
- if (draw->pipeline.cull)
- draw->pipeline.cull->destroy( draw->pipeline.cull );
- if (draw->pipeline.validate)
- draw->pipeline.validate->destroy( draw->pipeline.validate );
- if (draw->pipeline.aaline)
- draw->pipeline.aaline->destroy( draw->pipeline.aaline );
- if (draw->pipeline.aapoint)
- draw->pipeline.aapoint->destroy( draw->pipeline.aapoint );
- if (draw->pipeline.pstipple)
- draw->pipeline.pstipple->destroy( draw->pipeline.pstipple );
- if (draw->pipeline.rasterize)
- draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+
+ if (draw->machine.Inputs)
+ align_free(draw->machine.Inputs);
+
+ if (draw->machine.Outputs)
+ align_free(draw->machine.Outputs);
tgsi_exec_machine_free_data(&draw->machine);
-
- if (draw->vs.queue[0].vertex)
- align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */
/* Not so fast -- we're just borrowing this at the moment.
*
@@ -165,6 +97,7 @@ void draw_destroy( struct draw_context *draw )
draw->render->destroy( draw->render );
*/
+ draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
FREE( draw );
@@ -189,6 +122,20 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->rasterizer = raster;
+ draw->bypass_clipping =
+ ((draw->rasterizer && draw->rasterizer->bypass_clipping) ||
+ draw->driver.bypass_clipping);
+}
+
+
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->driver.bypass_clipping = bypass_clipping;
+ draw->bypass_clipping = (draw->rasterizer->bypass_clipping ||
+ draw->driver.bypass_clipping);
}
@@ -247,9 +194,8 @@ draw_set_vertex_buffers(struct draw_context *draw,
{
assert(count <= PIPE_MAX_ATTRIBS);
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
-
- memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0]));
+ draw->pt.nr_vertex_buffers = count;
}
@@ -260,9 +206,8 @@ draw_set_vertex_elements(struct draw_context *draw,
{
assert(count <= PIPE_MAX_ATTRIBS);
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
-
- memcpy(draw->vertex_element, elements, count * sizeof(elements[0]));
+ memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0]));
+ draw->pt.nr_vertex_elements = count;
}
@@ -273,8 +218,7 @@ void
draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer)
{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.vbuffer[attr] = buffer;
+ draw->pt.user.vbuffer[attr] = buffer;
}
@@ -282,8 +226,7 @@ void
draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer)
{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.constants = buffer;
+ draw->pt.user.constants = buffer;
}
@@ -295,7 +238,7 @@ void
draw_wide_point_threshold(struct draw_context *draw, float threshold)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->wide_point_threshold = threshold;
+ draw->pipeline.wide_point_threshold = threshold;
}
@@ -307,7 +250,7 @@ void
draw_wide_line_threshold(struct draw_context *draw, float threshold)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->wide_line_threshold = threshold;
+ draw->pipeline.wide_line_threshold = threshold;
}
@@ -318,7 +261,7 @@ void
draw_enable_line_stipple(struct draw_context *draw, boolean enable)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->line_stipple = enable;
+ draw->pipeline.line_stipple = enable;
}
@@ -329,7 +272,7 @@ void
draw_enable_point_sprites(struct draw_context *draw, boolean enable)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->point_sprite = enable;
+ draw->pipeline.point_sprite = enable;
}
@@ -384,79 +327,56 @@ draw_num_vs_outputs(struct draw_context *draw)
}
-/**
- * Allocate space for temporary post-transform vertices, such as for clipping.
- */
-void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
-{
- assert(!stage->tmp);
-
- stage->nr_tmps = nr;
-
- if (nr) {
- ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
- unsigned i;
-
- stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
-
- for (i = 0; i < nr; i++)
- stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
- }
-}
-
-void draw_free_temp_verts( struct draw_stage *stage )
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render )
{
- if (stage->tmp) {
- FREE( stage->tmp[0] );
- FREE( stage->tmp );
- stage->tmp = NULL;
- }
+ draw->render = render;
}
-
-boolean draw_use_sse(struct draw_context *draw)
+void draw_set_edgeflags( struct draw_context *draw,
+ const unsigned *edgeflag )
{
- return (boolean) draw->use_sse;
+ draw->pt.user.edgeflag = edgeflag;
}
-void draw_reset_vertex_ids(struct draw_context *draw)
-{
- struct draw_stage *stage = draw->pipeline.first;
-
- while (stage) {
- unsigned i;
-
- for (i = 0; i < stage->nr_tmps; i++)
- stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
- stage = stage->next;
- }
- draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */
- draw_pt_reset_vertex_ids(draw);
+/**
+ * Tell the drawing context about the index/element buffer to use
+ * (ala glDrawElements)
+ * If no element buffer is to be used (i.e. glDrawArrays) then this
+ * should be called with eltSize=0 and elements=NULL.
+ *
+ * \param draw the drawing context
+ * \param eltSize size of each element (1, 2 or 4 bytes)
+ * \param elements the element buffer ptr
+ */
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize, void *elements )
+{
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
}
-void draw_set_render( struct draw_context *draw,
- struct vbuf_render *render )
+
+/* Revamp me please:
+ */
+void draw_do_flush( struct draw_context *draw, unsigned flags )
{
- draw->render = render;
-}
+ if (!draw->suspend_flushing)
+ {
+ assert(!draw->flushing); /* catch inadvertant recursion */
-void draw_set_edgeflags( struct draw_context *draw,
- const unsigned *edgeflag )
-{
- draw->user.edgeflag = edgeflag;
-}
+ draw->flushing = TRUE;
+ draw_pipeline_flush( draw, flags );
-boolean draw_get_edgeflag( struct draw_context *draw,
- unsigned idx )
-{
- if (draw->user.edgeflag)
- return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
- else
- return 1;
+ draw->reduced_prim = ~0; /* is reduced_prim needed any more? */
+
+ draw->flushing = FALSE;
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index c7ac32b452..c5c3d3b09e 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -42,37 +42,11 @@
struct pipe_context;
-struct vertex_buffer;
-struct vertex_info;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
-/**
- * Clipmask flags
- */
-/*@{*/
-#define CLIP_RIGHT_BIT 0x01
-#define CLIP_LEFT_BIT 0x02
-#define CLIP_TOP_BIT 0x04
-#define CLIP_BOTTOM_BIT 0x08
-#define CLIP_NEAR_BIT 0x10
-#define CLIP_FAR_BIT 0x20
-/*@}*/
-
-/**
- * Bitshift for each clip flag
- */
-/*@{*/
-#define CLIP_RIGHT_SHIFT 0
-#define CLIP_LEFT_SHIFT 1
-#define CLIP_TOP_SHIFT 2
-#define CLIP_BOTTOM_SHIFT 3
-#define CLIP_NEAR_SHIFT 4
-#define CLIP_FAR_SHIFT 5
-/*@}*/
-
struct draw_context *draw_create( void );
@@ -99,15 +73,13 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable);
void draw_enable_point_sprites(struct draw_context *draw, boolean enable);
-boolean draw_use_sse(struct draw_context *draw);
-
-void
+boolean
draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe);
-void
+boolean
draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe);
-void
+boolean
draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe);
@@ -168,17 +140,24 @@ void draw_arrays(struct draw_context *draw, unsigned prim,
void draw_flush(struct draw_context *draw);
-/***********************************************************************
- * draw_debug.c
+
+/*******************************************************************************
+ * Driver backend interface
*/
-boolean draw_validate_prim( unsigned prim, unsigned length );
-unsigned draw_trim_prim( unsigned mode, unsigned count );
+struct vbuf_render;
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render );
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping );
+/*******************************************************************************
+ * Draw pipeline
+ */
+boolean draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned prim );
-struct vbuf_render;
-void draw_set_render( struct draw_context *draw,
- struct vbuf_render *render );
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
new file mode 100644
index 0000000000..46afb0f41f
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -0,0 +1,222 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pipe.h"
+
+
+
+boolean draw_pipeline_init( struct draw_context *draw )
+{
+ /* create pipeline stages */
+ draw->pipeline.wide_line = draw_wide_line_stage( draw );
+ draw->pipeline.wide_point = draw_wide_point_stage( draw );
+ draw->pipeline.stipple = draw_stipple_stage( draw );
+ draw->pipeline.unfilled = draw_unfilled_stage( draw );
+ draw->pipeline.twoside = draw_twoside_stage( draw );
+ draw->pipeline.offset = draw_offset_stage( draw );
+ draw->pipeline.clip = draw_clip_stage( draw );
+ draw->pipeline.flatshade = draw_flatshade_stage( draw );
+ draw->pipeline.cull = draw_cull_stage( draw );
+ draw->pipeline.validate = draw_validate_stage( draw );
+ draw->pipeline.first = draw->pipeline.validate;
+
+ if (!draw->pipeline.wide_line ||
+ !draw->pipeline.wide_point ||
+ !draw->pipeline.stipple ||
+ !draw->pipeline.unfilled ||
+ !draw->pipeline.twoside ||
+ !draw->pipeline.offset ||
+ !draw->pipeline.clip ||
+ !draw->pipeline.flatshade ||
+ !draw->pipeline.cull ||
+ !draw->pipeline.validate)
+ return FALSE;
+
+ /* these defaults are oriented toward the needs of softpipe */
+ draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */
+ draw->pipeline.wide_line_threshold = 1.0;
+ draw->pipeline.line_stipple = TRUE;
+ draw->pipeline.point_sprite = TRUE;
+
+ return TRUE;
+}
+
+
+void draw_pipeline_destroy( struct draw_context *draw )
+{
+ if (draw->pipeline.wide_line)
+ draw->pipeline.wide_line->destroy( draw->pipeline.wide_line );
+ if (draw->pipeline.wide_point)
+ draw->pipeline.wide_point->destroy( draw->pipeline.wide_point );
+ if (draw->pipeline.stipple)
+ draw->pipeline.stipple->destroy( draw->pipeline.stipple );
+ if (draw->pipeline.unfilled)
+ draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
+ if (draw->pipeline.twoside)
+ draw->pipeline.twoside->destroy( draw->pipeline.twoside );
+ if (draw->pipeline.offset)
+ draw->pipeline.offset->destroy( draw->pipeline.offset );
+ if (draw->pipeline.clip)
+ draw->pipeline.clip->destroy( draw->pipeline.clip );
+ if (draw->pipeline.flatshade)
+ draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
+ if (draw->pipeline.cull)
+ draw->pipeline.cull->destroy( draw->pipeline.cull );
+ if (draw->pipeline.validate)
+ draw->pipeline.validate->destroy( draw->pipeline.validate );
+ if (draw->pipeline.aaline)
+ draw->pipeline.aaline->destroy( draw->pipeline.aaline );
+ if (draw->pipeline.aapoint)
+ draw->pipeline.aapoint->destroy( draw->pipeline.aapoint );
+ if (draw->pipeline.pstipple)
+ draw->pipeline.pstipple->destroy( draw->pipeline.pstipple );
+ if (draw->pipeline.rasterize)
+ draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+}
+
+
+
+
+
+
+
+static void do_point( struct draw_context *draw,
+ const char *v0 )
+{
+ struct prim_header prim;
+
+ prim.flags = 0;
+ prim.pad = 0;
+ prim.v[0] = (struct vertex_header *)v0;
+
+ draw->pipeline.first->point( draw->pipeline.first, &prim );
+}
+
+
+static void do_line( struct draw_context *draw,
+ ushort flags,
+ const char *v0,
+ const char *v1 )
+{
+ struct prim_header prim;
+
+ prim.flags = flags;
+ prim.pad = 0;
+ prim.v[0] = (struct vertex_header *)v0;
+ prim.v[1] = (struct vertex_header *)v1;
+
+ draw->pipeline.first->line( draw->pipeline.first, &prim );
+}
+
+
+static void do_triangle( struct draw_context *draw,
+ ushort flags,
+ char *v0,
+ char *v1,
+ char *v2 )
+{
+ struct prim_header prim;
+
+ prim.v[0] = (struct vertex_header *)v0;
+ prim.v[1] = (struct vertex_header *)v1;
+ prim.v[2] = (struct vertex_header *)v2;
+ prim.flags = flags;
+ prim.pad = 0;
+
+ draw->pipeline.first->tri( draw->pipeline.first, &prim );
+}
+
+
+
+
+/* Code to run the pipeline on a fairly arbitary collection of vertices.
+ *
+ * Vertex headers must be pre-initialized with the
+ * UNDEFINED_VERTEX_ID, this code will cause that id to become
+ * overwritten, so it may have to be reset if there is the intention
+ * to reuse the vertices.
+ *
+ * This code provides a callback to reset the vertex id's which the
+ * draw_vbuf.c code uses when it has to perform a flush.
+ */
+void draw_pipeline_run( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
+{
+ char *verts = (char *)vertices;
+ unsigned i;
+
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = vertex_count;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i++)
+ do_point( draw,
+ verts + stride * elts[i] );
+ break;
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2)
+ do_line( draw,
+ elts[i+0], /* flags */
+ verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK),
+ verts + stride * elts[i+1]);
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3)
+ do_triangle( draw,
+ elts[i+0], /* flags */
+ verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK),
+ verts + stride * elts[i+1],
+ verts + stride * elts[i+2]);
+ break;
+ }
+
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
+}
+
+
+
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags )
+{
+ draw->pipeline.first->flush( draw->pipeline.first, flags );
+ draw->pipeline.first = draw->pipeline.validate;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
new file mode 100644
index 0000000000..f1cb0891ca
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -0,0 +1,125 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef DRAW_PIPE_H
+#define DRAW_PIPE_H
+
+#include "pipe/p_compiler.h"
+#include "draw_private.h" /* for sizeof(vertex_header) */
+
+
+/**
+ * Basic info for a point/line/triangle primitive.
+ */
+struct prim_header {
+ float det; /**< front/back face determinant */
+ ushort flags;
+ ushort pad;
+ struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */
+};
+
+
+
+/**
+ * Base class for all primitive drawing stages.
+ */
+struct draw_stage
+{
+ struct draw_context *draw; /**< parent context */
+
+ struct draw_stage *next; /**< next stage in pipeline */
+
+ struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
+ unsigned nr_tmps;
+
+ void (*point)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*line)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*tri)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*flush)( struct draw_stage *,
+ unsigned flags );
+
+ void (*reset_stipple_counter)( struct draw_stage * );
+
+ void (*destroy)( struct draw_stage * );
+};
+
+
+extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
+extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
+extern struct draw_stage *draw_offset_stage( struct draw_context *context );
+extern struct draw_stage *draw_clip_stage( struct draw_context *context );
+extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
+extern struct draw_stage *draw_cull_stage( struct draw_context *context );
+extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_point_stage( struct draw_context *context );
+extern struct draw_stage *draw_validate_stage( struct draw_context *context );
+
+
+extern void draw_free_temp_verts( struct draw_stage *stage );
+extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
+
+extern void draw_reset_vertex_ids( struct draw_context *draw );
+
+void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header);
+
+
+
+/**
+ * Get a writeable copy of a vertex.
+ * \param stage drawing stage info
+ * \param vert the vertex to copy (source)
+ * \param idx index into stage's tmp[] array to put the copy (dest)
+ * \return pointer to the copied vertex
+ */
+static INLINE struct vertex_header *
+dup_vert( struct draw_stage *stage,
+ const struct vertex_header *vert,
+ unsigned idx )
+{
+ struct vertex_header *tmp = stage->tmp[idx];
+ const uint vsize = sizeof(struct vertex_header)
+ + stage->draw->num_vs_outputs * 4 * sizeof(float);
+ memcpy(tmp, vert, vsize);
+ tmp->vertex_id = UNDEFINED_VERTEX_ID;
+ return tmp;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index e8d2a45102..346a96e37f 100644
--- a/src/gallium/auxiliary/draw/draw_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -43,6 +43,7 @@
#include "draw_context.h"
#include "draw_private.h"
+#include "draw_pipe.h"
/**
@@ -333,11 +334,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
* Generate the frag shader we'll use for drawing AA lines.
* This will be the user's shader plus some texture/modulate instructions.
*/
-static void
+static boolean
generate_aaline_fs(struct aaline_stage *aaline)
{
const struct pipe_shader_state *orig_fs = &aaline->fs->state;
- //struct draw_context *draw = aaline->stage.draw;
struct pipe_shader_state aaline_fs;
struct aa_transform_context transform;
@@ -345,6 +345,8 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline_fs = *orig_fs; /* copy to init */
aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aaline_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.colorOutput = -1;
@@ -369,15 +371,18 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->aaline_fs
= aaline->driver_create_fs_state(aaline->pipe, &aaline_fs);
+ if (aaline->fs->aaline_fs == NULL)
+ return FALSE;
aaline->fs->generic_attrib = transform.maxGeneric + 1;
+ return TRUE;
}
/**
* Create the texture map we'll use for antialiasing the lines.
*/
-static void
+static boolean
aaline_create_texture(struct aaline_stage *aaline)
{
struct pipe_context *pipe = aaline->pipe;
@@ -387,7 +392,7 @@ aaline_create_texture(struct aaline_stage *aaline)
memset(&texTemp, 0, sizeof(texTemp));
texTemp.target = PIPE_TEXTURE_2D;
- texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */
+ texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = MAX_TEXTURE_LEVEL;
texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
@@ -395,6 +400,8 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.cpp = 1;
aaline->texture = screen->texture_create(screen, &texTemp);
+ if (!aaline->texture)
+ return FALSE;
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
@@ -410,6 +417,8 @@ aaline_create_texture(struct aaline_stage *aaline)
surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0);
data = pipe_surface_map(surface);
+ if (data == NULL)
+ return FALSE;
for (i = 0; i < size; i++) {
for (j = 0; j < size; j++) {
@@ -435,6 +444,7 @@ aaline_create_texture(struct aaline_stage *aaline)
pipe_surface_reference(&surface, NULL);
pipe->texture_update(pipe, aaline->texture, 0, (1 << level));
}
+ return TRUE;
}
@@ -443,7 +453,7 @@ aaline_create_texture(struct aaline_stage *aaline)
* By using a mipmapped texture, we don't have to generate a different
* texture image for each line size.
*/
-static void
+static boolean
aaline_create_sampler(struct aaline_stage *aaline)
{
struct pipe_sampler_state sampler;
@@ -461,6 +471,10 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.max_lod = MAX_TEXTURE_LEVEL;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (aaline->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
}
@@ -468,13 +482,20 @@ aaline_create_sampler(struct aaline_stage *aaline)
* When we're about to draw our first AA line in a batch, this function is
* called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_aaline_fragment_shader(struct aaline_stage *aaline)
{
- if (!aaline->fs->aaline_fs) {
- generate_aaline_fs(aaline);
- }
+ struct draw_context *draw = aaline->stage.draw;
+
+ if (!aaline->fs->aaline_fs &&
+ !generate_aaline_fs(aaline))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs);
+ draw->suspend_flushing = FALSE;
+
+ return TRUE;
}
@@ -486,20 +507,6 @@ aaline_stage( struct draw_stage *stage )
}
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
-
-
/**
* Draw a wide line by drawing a quad, using geometry which will
* fullfill GL's antialiased line requirements.
@@ -637,7 +644,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
/*
* Bind (generate) our fragprog, sampler and texture
*/
- bind_aaline_fragment_shader(aaline);
+ if (!bind_aaline_fragment_shader(aaline)) {
+ stage->line = draw_pipe_passthrough_line;
+ stage->line(stage, header);
+ return;
+ }
/* update vertex attrib info */
aaline->tex_slot = draw->num_vs_outputs;
@@ -657,8 +668,10 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit],
aaline->texture);
+ draw->suspend_flushing = TRUE;
aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler);
aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture);
+ draw->suspend_flushing = FALSE;
/* now really draw first line */
stage->line = aaline_line;
@@ -676,14 +689,14 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
stage->line = aaline_first_line;
stage->next->flush( stage->next, flags );
- /* restore original frag shader */
+ /* restore original frag shader, texture, sampler state */
+ draw->suspend_flushing = TRUE;
aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs);
-
- /* XXX restore original texture, sampler state */
aaline->driver_bind_sampler_states(pipe, aaline->num_samplers,
aaline->state.sampler);
aaline->driver_set_sampler_textures(pipe, aaline->num_textures,
aaline->state.texture);
+ draw->suspend_flushing = FALSE;
draw->extra_vp_outputs.slot = 0;
}
@@ -700,10 +713,17 @@ static void
aaline_destroy(struct draw_stage *stage)
{
struct aaline_stage *aaline = aaline_stage(stage);
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&aaline->state.texture[i], NULL);
+ }
- aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
+ if (aaline->sampler_cso)
+ aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
- pipe_texture_release(&aaline->texture);
+ if (aaline->texture)
+ pipe_texture_release(&aaline->texture);
draw_free_temp_verts( stage );
@@ -715,19 +735,28 @@ static struct aaline_stage *
draw_aaline_stage(struct draw_context *draw)
{
struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
+ if (aaline == NULL)
+ return NULL;
- draw_alloc_temp_verts( &aaline->stage, 8 );
+ if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+ goto fail;
aaline->stage.draw = draw;
aaline->stage.next = NULL;
- aaline->stage.point = passthrough_point;
+ aaline->stage.point = draw_pipe_passthrough_point;
aaline->stage.line = aaline_first_line;
- aaline->stage.tri = passthrough_tri;
+ aaline->stage.tri = draw_pipe_passthrough_tri;
aaline->stage.flush = aaline_flush;
aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
aaline->stage.destroy = aaline_destroy;
return aaline;
+
+ fail:
+ if (aaline)
+ aaline_destroy(&aaline->stage);
+
+ return NULL;
}
@@ -749,13 +778,13 @@ aaline_create_fs_state(struct pipe_context *pipe,
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
- if (aafs) {
- aafs->state = *fs;
+ aafs->state = *fs;
- /* pass-through */
- aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
- }
+ /* pass-through */
+ aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
return aafs;
}
@@ -766,6 +795,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+
/* save current */
aaline->fs = aafs;
/* pass-through */
@@ -790,9 +820,11 @@ aaline_bind_sampler_states(struct pipe_context *pipe,
unsigned num, void **sampler)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+
/* save current */
memcpy(aaline->state.sampler, sampler, num * sizeof(void *));
aaline->num_samplers = num;
+
/* pass-through */
aaline->driver_bind_sampler_states(aaline->pipe, num, sampler);
}
@@ -809,6 +841,9 @@ aaline_set_sampler_textures(struct pipe_context *pipe,
for (i = 0; i < num; i++) {
pipe_texture_reference(&aaline->state.texture[i], texture[i]);
}
+ for ( ; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&aaline->state.texture[i], NULL);
+ }
aaline->num_textures = num;
/* pass-through */
@@ -821,7 +856,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe,
* into the draw module's pipeline. This will not be used if the
* hardware has native support for AA lines.
*/
-void
+boolean
draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
{
struct aaline_stage *aaline;
@@ -832,14 +867,17 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
* Create / install AA line drawing / prim stage
*/
aaline = draw_aaline_stage( draw );
- assert(aaline);
- draw->pipeline.aaline = &aaline->stage;
+ if (!aaline)
+ goto fail;
aaline->pipe = pipe;
/* create special texture, sampler state */
- aaline_create_texture(aaline);
- aaline_create_sampler(aaline);
+ if (!aaline_create_texture(aaline))
+ goto fail;
+
+ if (!aaline_create_sampler(aaline))
+ goto fail;
/* save original driver functions */
aaline->driver_create_fs_state = pipe->create_fs_state;
@@ -856,4 +894,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
pipe->bind_sampler_states = aaline_bind_sampler_states;
pipe->set_sampler_textures = aaline_set_sampler_textures;
+
+ /* Install once everything is known to be OK:
+ */
+ draw->pipeline.aaline = &aaline->stage;
+
+ return TRUE;
+
+ fail:
+ if (aaline)
+ aaline->stage.destroy( &aaline->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index fcebe3e7a0..122a48660a 100644
--- a/src/gallium/auxiliary/draw/draw_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -48,7 +48,8 @@
#include "tgsi/util/tgsi_dump.h"
#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
/*
@@ -480,10 +481,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
/**
- * Generate the frag shader we'll use for drawing AA lines.
+ * Generate the frag shader we'll use for drawing AA points.
* This will be the user's shader plus some texture/modulate instructions.
*/
-static void
+static boolean
generate_aapoint_fs(struct aapoint_stage *aapoint)
{
const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
@@ -494,6 +495,8 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
aapoint_fs = *orig_fs; /* copy to init */
aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aapoint_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.colorOutput = -1;
@@ -518,22 +521,33 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
aapoint->fs->aapoint_fs
= aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
+ if (aapoint->fs->aapoint_fs == NULL)
+ return FALSE;
aapoint->fs->generic_attrib = transform.maxGeneric + 1;
+
+ return TRUE;
}
/**
- * When we're about to draw our first AA line in a batch, this function is
+ * When we're about to draw our first AA point in a batch, this function is
* called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
{
- if (!aapoint->fs->aapoint_fs) {
- generate_aapoint_fs(aapoint);
- }
+ struct draw_context *draw = aapoint->stage.draw;
+
+ if (!aapoint->fs->aapoint_fs &&
+ !generate_aapoint_fs(aapoint))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
+ draw->suspend_flushing = FALSE;
+
+ return TRUE;
}
@@ -545,18 +559,6 @@ aapoint_stage( struct draw_stage *stage )
}
-static void
-passthrough_line(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->line(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
/**
@@ -700,7 +702,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
}
}
- /* now really draw first line */
+ /* now really draw first point */
stage->point = aapoint_point;
stage->point(stage, header);
}
@@ -717,7 +719,9 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
stage->next->flush( stage->next, flags );
/* restore original frag shader */
+ draw->suspend_flushing = TRUE;
aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
+ draw->suspend_flushing = FALSE;
draw->extra_vp_outputs.slot = 0;
}
@@ -742,19 +746,29 @@ static struct aapoint_stage *
draw_aapoint_stage(struct draw_context *draw)
{
struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
+ if (aapoint == NULL)
+ goto fail;
- draw_alloc_temp_verts( &aapoint->stage, 4 );
+ if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
+ goto fail;
aapoint->stage.draw = draw;
aapoint->stage.next = NULL;
aapoint->stage.point = aapoint_first_point;
- aapoint->stage.line = passthrough_line;
- aapoint->stage.tri = passthrough_tri;
+ aapoint->stage.line = draw_pipe_passthrough_line;
+ aapoint->stage.tri = draw_pipe_passthrough_tri;
aapoint->stage.flush = aapoint_flush;
aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
aapoint->stage.destroy = aapoint_destroy;
return aapoint;
+
+ fail:
+ if (aapoint)
+ aapoint_destroy(&aapoint->stage);
+
+ return NULL;
+
}
@@ -776,13 +790,13 @@ aapoint_create_fs_state(struct pipe_context *pipe,
{
struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
- if (aafs) {
- aafs->state = *fs;
+ aafs->state = *fs;
- /* pass-through */
- aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
- }
+ /* pass-through */
+ aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
return aafs;
}
@@ -817,7 +831,7 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
* into the draw module's pipeline. This will not be used if the
* hardware has native support for AA points.
*/
-void
+boolean
draw_install_aapoint_stage(struct draw_context *draw,
struct pipe_context *pipe)
{
@@ -829,8 +843,8 @@ draw_install_aapoint_stage(struct draw_context *draw,
* Create / install AA point drawing / prim stage
*/
aapoint = draw_aapoint_stage( draw );
- assert(aapoint);
- draw->pipeline.aapoint = &aapoint->stage;
+ if (aapoint == NULL)
+ goto fail;
aapoint->pipe = pipe;
@@ -843,4 +857,14 @@ draw_install_aapoint_stage(struct draw_context *draw,
pipe->create_fs_state = aapoint_create_fs_state;
pipe->bind_fs_state = aapoint_bind_fs_state;
pipe->delete_fs_state = aapoint_delete_fs_state;
+
+ draw->pipeline.aapoint = &aapoint->stage;
+
+ return TRUE;
+
+ fail:
+ if (aapoint)
+ aapoint->stage.destroy( &aapoint->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index e24c5d8032..ce80c94163 100644
--- a/src/gallium/auxiliary/draw/draw_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -35,8 +35,8 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
#ifndef IS_NEGATIVE
@@ -119,7 +119,7 @@ static void interp( const struct clipper *clip,
*/
{
dst->clipmask = 0;
- dst->edgeflag = 0;
+ dst->edgeflag = 0; /* will get overwritten later */
dst->pad = 0;
dst->vertex_id = UNDEFINED_VERTEX_ID;
}
@@ -162,49 +162,50 @@ static void emit_poly( struct draw_stage *stage,
struct prim_header header;
unsigned i;
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
/* later stages may need the determinant, but only the sign matters */
header.det = origPrim->det;
+ header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ header.pad = 0;
- for (i = 2; i < n; i++) {
+ for (i = 2; i < n; i++, header.flags = 0) {
header.v[0] = inlist[i-1];
header.v[1] = inlist[i];
header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
-
- {
- unsigned tmp1 = header.v[1]->edgeflag;
- unsigned tmp2 = header.v[2]->edgeflag;
-
- if (i != n-1) header.v[1]->edgeflag = 0;
- if (i != 2) header.v[2]->edgeflag = 0;
-
- header.edgeflags = ((header.v[0]->edgeflag << 0) |
- (header.v[1]->edgeflag << 1) |
- (header.v[2]->edgeflag << 2));
-
- if (0) {
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
- uint j, k;
- debug_printf("Clipped tri:\n");
- for (j = 0; j < 3; j++) {
- for (k = 0; k < vs->info.num_outputs; k++) {
- debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
- header.v[j]->data[k][0],
- header.v[j]->data[k][1],
- header.v[j]->data[k][2],
- header.v[j]->data[k][3]);
- }
+
+ if (i == n-1)
+ header.flags |= edge_last;
+
+ if (0) {
+ const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ uint j, k;
+ debug_printf("Clipped tri:\n");
+ for (j = 0; j < 3; j++) {
+ for (k = 0; k < vs->info.num_outputs; k++) {
+ debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
+ header.v[j]->data[k][0],
+ header.v[j]->data[k][1],
+ header.v[j]->data[k][2],
+ header.v[j]->data[k][3]);
}
}
-
- stage->next->tri( stage->next, &header );
-
- header.v[1]->edgeflag = tmp1;
- header.v[2]->edgeflag = tmp2;
}
+
+ stage->next->tri( stage->next, &header );
}
}
-
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
/* Clip a triangle against the viewport and user clip planes.
@@ -486,8 +487,11 @@ static void clip_destroy( struct draw_stage *stage )
struct draw_stage *draw_clip_stage( struct draw_context *draw )
{
struct clipper *clipper = CALLOC_STRUCT(clipper);
+ if (clipper == NULL)
+ goto fail;
- draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 );
+ if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
+ goto fail;
clipper->stage.draw = draw;
clipper->stage.point = clip_point;
@@ -500,4 +504,10 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
clipper->plane = draw->plane;
return &clipper->stage;
+
+ fail:
+ if (clipper)
+ clipper->stage.destroy( &clipper->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 8177b0ac86..87aaf1f85b 100644
--- a/src/gallium/auxiliary/draw/draw_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -35,7 +35,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
struct cull_stage {
@@ -95,20 +95,6 @@ static void cull_first_tri( struct draw_stage *stage,
-static void cull_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void cull_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
static void cull_flush( struct draw_stage *stage, unsigned flags )
{
stage->tri = cull_first_tri;
@@ -134,17 +120,26 @@ static void cull_destroy( struct draw_stage *stage )
struct draw_stage *draw_cull_stage( struct draw_context *draw )
{
struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
+ if (cull == NULL)
+ goto fail;
- draw_alloc_temp_verts( &cull->stage, 0 );
+ if (!draw_alloc_temp_verts( &cull->stage, 0 ))
+ goto fail;
cull->stage.draw = draw;
cull->stage.next = NULL;
- cull->stage.point = cull_point;
- cull->stage.line = cull_line;
+ cull->stage.point = draw_pipe_passthrough_point;
+ cull->stage.line = draw_pipe_passthrough_line;
cull->stage.tri = cull_first_tri;
cull->stage.flush = cull_flush;
cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
cull->stage.destroy = cull_destroy;
return &cull->stage;
+
+ fail:
+ if (cull)
+ cull->stage.destroy( &cull->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index af2cb05c98..09b68c4559 100644
--- a/src/gallium/auxiliary/draw/draw_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -30,7 +30,8 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
/** subclass of draw_stage */
@@ -90,7 +91,8 @@ static void flatshade_tri_0( struct draw_stage *stage,
struct prim_header tmp;
tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
tmp.v[0] = header->v[0];
tmp.v[1] = dup_vert(stage, header->v[1], 0);
tmp.v[2] = dup_vert(stage, header->v[2], 1);
@@ -107,7 +109,8 @@ static void flatshade_tri_2( struct draw_stage *stage,
struct prim_header tmp;
tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
tmp.v[0] = dup_vert(stage, header->v[0], 0);
tmp.v[1] = dup_vert(stage, header->v[1], 1);
tmp.v[2] = header->v[2];
@@ -151,13 +154,6 @@ static void flatshade_line_1( struct draw_stage *stage,
}
-/* Flatshade point -- passthrough.
- */
-static void flatshade_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
static void flatshade_init_state( struct draw_stage *stage )
@@ -230,12 +226,15 @@ static void flatshade_destroy( struct draw_stage *stage )
struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
{
struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
+ if (flatshade == NULL)
+ goto fail;
- draw_alloc_temp_verts( &flatshade->stage, 2 );
+ if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
+ goto fail;
flatshade->stage.draw = draw;
flatshade->stage.next = NULL;
- flatshade->stage.point = flatshade_point;
+ flatshade->stage.point = draw_pipe_passthrough_point;
flatshade->stage.line = flatshade_first_line;
flatshade->stage.tri = flatshade_first_tri;
flatshade->stage.flush = flatshade_flush;
@@ -243,6 +242,12 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
flatshade->stage.destroy = flatshade_destroy;
return &flatshade->stage;
+
+ fail:
+ if (flatshade)
+ flatshade->stage.destroy( &flatshade->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index dbc676deae..ea6de8c571 100644
--- a/src/gallium/auxiliary/draw/draw_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -33,7 +33,7 @@
*/
#include "pipe/p_util.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
@@ -106,7 +106,8 @@ static void offset_tri( struct draw_stage *stage,
struct prim_header tmp;
tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
tmp.v[0] = dup_vert(stage, header->v[0], 0);
tmp.v[1] = dup_vert(stage, header->v[1], 1);
tmp.v[2] = dup_vert(stage, header->v[2], 2);
@@ -129,18 +130,6 @@ static void offset_first_tri( struct draw_stage *stage,
}
-static void offset_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void offset_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
static void offset_flush( struct draw_stage *stage,
@@ -170,17 +159,25 @@ static void offset_destroy( struct draw_stage *stage )
struct draw_stage *draw_offset_stage( struct draw_context *draw )
{
struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
+ if (offset == NULL)
+ goto fail;
draw_alloc_temp_verts( &offset->stage, 3 );
offset->stage.draw = draw;
offset->stage.next = NULL;
- offset->stage.point = offset_point;
- offset->stage.line = offset_line;
+ offset->stage.point = draw_pipe_passthrough_point;
+ offset->stage.line = draw_pipe_passthrough_line;
offset->stage.tri = offset_first_tri;
offset->stage.flush = offset_flush;
offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
offset->stage.destroy = offset_destroy;
return &offset->stage;
+
+ fail:
+ if (offset)
+ offset->stage.destroy( &offset->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 4dddb72906..2dfd1800d5 100644
--- a/src/gallium/auxiliary/draw/draw_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -44,7 +44,7 @@
#include "tgsi/util/tgsi_dump.h"
#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
@@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
uint size = 4;
immed = tgsi_default_full_immediate();
immed.Immediate.Size = 1 + size; /* one for the token itself */
- immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value;
+ immed.u.Pointer = (void *) value;
ctx->emit_immediate(ctx, &immed);
}
@@ -320,7 +320,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
* Generate the frag shader we'll use for doing polygon stipple.
* This will be the user's shader prefixed with a TEX and KIL instruction.
*/
-static void
+static boolean
generate_pstip_fs(struct pstip_stage *pstip)
{
const struct pipe_shader_state *orig_fs = &pstip->fs->state;
@@ -332,6 +332,8 @@ generate_pstip_fs(struct pstip_stage *pstip)
pstip_fs = *orig_fs; /* copy to init */
pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (pstip_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.wincoordInput = -1;
@@ -355,6 +357,8 @@ generate_pstip_fs(struct pstip_stage *pstip)
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
+
+ return TRUE;
}
@@ -404,7 +408,7 @@ pstip_update_texture(struct pstip_stage *pstip)
/**
* Create the texture map we'll use for stippling.
*/
-static void
+static boolean
pstip_create_texture(struct pstip_stage *pstip)
{
struct pipe_context *pipe = pstip->pipe;
@@ -413,7 +417,7 @@ pstip_create_texture(struct pstip_stage *pstip)
memset(&texTemp, 0, sizeof(texTemp));
texTemp.target = PIPE_TEXTURE_2D;
- texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */
+ texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = 0;
texTemp.width[0] = 32;
texTemp.height[0] = 32;
@@ -421,16 +425,17 @@ pstip_create_texture(struct pstip_stage *pstip)
texTemp.cpp = 1;
pstip->texture = screen->texture_create(screen, &texTemp);
- assert(pstip->texture->refcount == 1);
+ if (pstip->texture == NULL)
+ return FALSE;
+
+ return TRUE;
}
/**
- * Create the sampler CSO that'll be used for antialiasing.
- * By using a mipmapped texture, we don't have to generate a different
- * texture image for each line size.
+ * Create the sampler CSO that'll be used for stippling.
*/
-static void
+static boolean
pstip_create_sampler(struct pstip_stage *pstip)
{
struct pipe_sampler_state sampler;
@@ -448,24 +453,32 @@ pstip_create_sampler(struct pstip_stage *pstip)
sampler.max_lod = 0.0f;
pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (pstip->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
}
/**
- * When we're about to draw our first AA line in a batch, this function is
- * called to tell the driver to bind our modified fragment shader.
+ * When we're about to draw our first stipple polygon in a batch, this function
+ * is called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_pstip_fragment_shader(struct pstip_stage *pstip)
{
- if (!pstip->fs->pstip_fs) {
- generate_pstip_fs(pstip);
- }
+ struct draw_context *draw = pstip->stage.draw;
+ if (!pstip->fs->pstip_fs &&
+ !generate_pstip_fs(pstip))
+ return FALSE;
+
+ draw->suspend_flushing = TRUE;
pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs);
+ draw->suspend_flushing = FALSE;
+ return TRUE;
}
-
static INLINE struct pstip_stage *
pstip_stage( struct draw_stage *stage )
{
@@ -474,38 +487,22 @@ pstip_stage( struct draw_stage *stage )
static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point(stage->next, header);
-}
-
-
-static void
-passthrough_line(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->line(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
-
-
-
-static void
pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
{
struct pstip_stage *pstip = pstip_stage(stage);
struct pipe_context *pipe = pstip->pipe;
+ struct draw_context *draw = stage->draw;
uint num_samplers;
assert(stage->draw->rasterizer->poly_stipple_enable);
/* bind our fragprog */
- bind_pstip_fragment_shader(pstip);
+ if (!bind_pstip_fragment_shader(pstip)) {
+ stage->tri = draw_pipe_passthrough_tri;
+ stage->tri(stage, header);
+ return;
+ }
+
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -519,11 +516,13 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
assert(num_samplers <= PIPE_MAX_SAMPLERS);
+ draw->suspend_flushing = TRUE;
pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers);
pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures);
+ draw->suspend_flushing = FALSE;
- /* now really draw first line */
- stage->tri = passthrough_tri;
+ /* now really draw first triangle */
+ stage->tri = draw_pipe_passthrough_tri;
stage->tri(stage, header);
}
@@ -531,21 +530,21 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
static void
pstip_flush(struct draw_stage *stage, unsigned flags)
{
- /*struct draw_context *draw = stage->draw;*/
+ struct draw_context *draw = stage->draw;
struct pstip_stage *pstip = pstip_stage(stage);
struct pipe_context *pipe = pstip->pipe;
stage->tri = pstip_first_tri;
stage->next->flush( stage->next, flags );
- /* restore original frag shader */
+ /* restore original frag shader, texture, sampler state */
+ draw->suspend_flushing = TRUE;
pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs);
-
- /* XXX restore original texture, sampler state */
pstip->driver_bind_sampler_states(pipe, pstip->num_samplers,
pstip->state.samplers);
pstip->driver_set_sampler_textures(pipe, pstip->num_textures,
pstip->state.textures);
+ draw->suspend_flushing = FALSE;
}
@@ -560,6 +559,11 @@ static void
pstip_destroy(struct draw_stage *stage)
{
struct pstip_stage *pstip = pstip_stage(stage);
+ uint i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_texture_reference(&pstip->state.textures[i], NULL);
+ }
pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso);
@@ -579,8 +583,8 @@ draw_pstip_stage(struct draw_context *draw)
pstip->stage.draw = draw;
pstip->stage.next = NULL;
- pstip->stage.point = passthrough_point;
- pstip->stage.line = passthrough_line;
+ pstip->stage.point = draw_pipe_passthrough_point;
+ pstip->stage.line = draw_pipe_passthrough_line;
pstip->stage.tri = pstip_first_tri;
pstip->stage.flush = pstip_flush;
pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter;
@@ -690,8 +694,10 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *stipple)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
+
/* save current */
pstip->state.stipple = stipple;
+
/* pass-through */
pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
@@ -699,13 +705,12 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
}
-
/**
- * Called by drivers that want to install this AA line prim stage
+ * Called by drivers that want to install this polygon stipple stage
* into the draw module's pipeline. This will not be used if the
- * hardware has native support for AA lines.
+ * hardware has native support for polygon stipple.
*/
-void
+boolean
draw_install_pstipple_stage(struct draw_context *draw,
struct pipe_context *pipe)
{
@@ -714,17 +719,22 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->draw = (void *) draw;
/*
- * Create / install AA line drawing / prim stage
+ * Create / install pgon stipple drawing / prim stage
*/
pstip = draw_pstip_stage( draw );
- assert(pstip);
+ if (pstip == NULL)
+ goto fail;
+
draw->pipeline.pstipple = &pstip->stage;
pstip->pipe = pipe;
/* create special texture, sampler state */
- pstip_create_texture(pstip);
- pstip_create_sampler(pstip);
+ if (!pstip_create_texture(pstip))
+ goto fail;
+
+ if (!pstip_create_sampler(pstip))
+ goto fail;
/* save original driver functions */
pstip->driver_create_fs_state = pipe->create_fs_state;
@@ -743,4 +753,12 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->bind_sampler_states = pstip_bind_sampler_states;
pipe->set_sampler_textures = pstip_set_sampler_textures;
pipe->set_polygon_stipple = pstip_set_polygon_stipple;
+
+ return TRUE;
+
+ fail:
+ if (pstip)
+ pstip->stage.destroy( &pstip->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 506f33512c..3cbced362e 100644
--- a/src/gallium/auxiliary/draw/draw_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -39,7 +39,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
/** Subclass of draw_stage */
@@ -135,6 +135,10 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
float length = MAX2(dx, dy);
int i;
+ if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+ stipple->counter = 0;
+
+
/* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
*/
for (i = 0; i < length; i++) {
@@ -195,18 +199,6 @@ stipple_flush(struct draw_stage *stage, unsigned flags)
}
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
static void
@@ -228,9 +220,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.draw = draw;
stipple->stage.next = NULL;
- stipple->stage.point = passthrough_point;
+ stipple->stage.point = draw_pipe_passthrough_point;
stipple->stage.line = stipple_first_line;
- stipple->stage.tri = passthrough_tri;
+ stipple->stage.tri = draw_pipe_passthrough_tri;
stipple->stage.reset_stipple_counter = reset_stipple_counter;
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 3debaac282..50872fdbe9 100644
--- a/src/gallium/auxiliary/draw/draw_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -31,8 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
+#include "draw_vs.h"
+#include "draw_pipe.h"
struct twoside_stage {
struct draw_stage stage;
@@ -85,7 +85,8 @@ static void twoside_tri( struct draw_stage *stage,
struct prim_header tmp;
tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
+ tmp.flags = header->flags;
+ tmp.pad = header->pad;
/* copy back attribs to front attribs */
tmp.v[0] = copy_bfc(twoside, header->v[0], 0);
tmp.v[1] = copy_bfc(twoside, header->v[1], 1);
@@ -99,21 +100,6 @@ static void twoside_tri( struct draw_stage *stage,
}
-static void twoside_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->line( stage->next, header );
-}
-
-
-static void twoside_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->point( stage->next, header );
-}
-
static void twoside_first_tri( struct draw_stage *stage,
struct prim_header *header )
@@ -187,17 +173,26 @@ static void twoside_destroy( struct draw_stage *stage )
struct draw_stage *draw_twoside_stage( struct draw_context *draw )
{
struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
+ if (twoside == NULL)
+ goto fail;
- draw_alloc_temp_verts( &twoside->stage, 3 );
+ if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
+ goto fail;
twoside->stage.draw = draw;
twoside->stage.next = NULL;
- twoside->stage.point = twoside_point;
- twoside->stage.line = twoside_line;
+ twoside->stage.point = draw_pipe_passthrough_point;
+ twoside->stage.line = draw_pipe_passthrough_line;
twoside->stage.tri = twoside_first_tri;
twoside->stage.flush = twoside_flush;
twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
twoside->stage.destroy = twoside_destroy;
return &twoside->stage;
+
+ fail:
+ if (twoside)
+ twoside->stage.destroy( &twoside->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index b07860cd9e..8f97fdedaa 100644
--- a/src/gallium/auxiliary/draw/draw_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -36,6 +36,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "draw_private.h"
+#include "draw_pipe.h"
struct unfilled_stage {
@@ -82,9 +83,9 @@ static void points( struct draw_stage *stage,
struct vertex_header *v1 = header->v[1];
struct vertex_header *v2 = header->v[2];
- if (header->edgeflags & 0x1) point( stage, v0 );
- if (header->edgeflags & 0x2) point( stage, v1 );
- if (header->edgeflags & 0x4) point( stage, v2 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 );
}
@@ -95,22 +96,22 @@ static void lines( struct draw_stage *stage,
struct vertex_header *v1 = header->v[1];
struct vertex_header *v2 = header->v[2];
-#if 0
- assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag);
- assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag);
- assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag);
-#endif
+ if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+ stage->next->reset_stipple_counter( stage->next );
- if (header->edgeflags & 0x4) line( stage, v2, v0 );
- if (header->edgeflags & 0x1) line( stage, v0, v1 );
- if (header->edgeflags & 0x2) line( stage, v1, v2 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 );
+ if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 );
}
/* Unfilled tri:
*
* Note edgeflags in the vertex struct is not sufficient as we will
- * need to manipulate them when decomposing primitives???
+ * need to manipulate them when decomposing primitives.
+ *
+ * We currently keep the vertex edgeflag and primitive edgeflag mask
+ * separate until the last possible moment.
*/
static void unfilled_tri( struct draw_stage *stage,
struct prim_header *header )
@@ -147,19 +148,6 @@ static void unfilled_first_tri( struct draw_stage *stage,
}
-static void unfilled_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void unfilled_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
static void unfilled_flush( struct draw_stage *stage,
unsigned flags )
@@ -189,18 +177,27 @@ static void unfilled_destroy( struct draw_stage *stage )
struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
{
struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
+ if (unfilled == NULL)
+ goto fail;
- draw_alloc_temp_verts( &unfilled->stage, 0 );
+ if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
+ goto fail;
unfilled->stage.draw = draw;
unfilled->stage.next = NULL;
unfilled->stage.tmp = NULL;
- unfilled->stage.point = unfilled_point;
- unfilled->stage.line = unfilled_line;
+ unfilled->stage.point = draw_pipe_passthrough_point;
+ unfilled->stage.line = draw_pipe_passthrough_line;
unfilled->stage.tri = unfilled_first_tri;
unfilled->stage.flush = unfilled_flush;
unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
unfilled->stage.destroy = unfilled_destroy;
return &unfilled->stage;
+
+ fail:
+ if (unfilled)
+ unfilled->stage.destroy( &unfilled->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c
new file mode 100644
index 0000000000..04438f4dd0
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_util.c
@@ -0,0 +1,137 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pipe.h"
+
+
+
+void
+draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->point(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->line(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->tri(stage->next, header);
+}
+
+
+
+
+
+/* This is only used for temporary verts.
+ */
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+/**
+ * Allocate space for temporary post-transform vertices, such as for clipping.
+ */
+boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
+{
+ assert(!stage->tmp);
+
+ stage->tmp = NULL;
+ stage->nr_tmps = nr;
+
+ if (nr != 0)
+ {
+ unsigned i;
+ ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
+
+ if (store == NULL)
+ return FALSE;
+
+ stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
+ if (stage->tmp == NULL) {
+ FREE(store);
+ return FALSE;
+ }
+
+ for (i = 0; i < nr; i++)
+ stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
+ }
+
+ return TRUE;
+}
+
+
+void draw_free_temp_verts( struct draw_stage *stage )
+{
+ if (stage->tmp) {
+ FREE( stage->tmp[0] );
+ FREE( stage->tmp );
+ stage->tmp = NULL;
+ }
+}
+
+
+/* Reset vertex ids. This is basically a type of flush.
+ *
+ * Called only from draw_pipe_vbuf.c
+ */
+void draw_reset_vertex_ids(struct draw_context *draw)
+{
+ struct draw_stage *stage = draw->pipeline.first;
+
+ while (stage) {
+ unsigned i;
+
+ for (i = 0; i < stage->nr_tmps; i++)
+ stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
+
+ stage = stage->next;
+ }
+
+ if (draw->pipeline.verts)
+ {
+ unsigned i;
+ char *verts = draw->pipeline.verts;
+ unsigned stride = draw->pipeline.vertex_stride;
+
+ for (i = 0; i < draw->pipeline.vertex_count; i++) {
+ ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID;
+ verts += stride;
+ }
+ }
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index e163e078f0..6be1d369c3 100644
--- a/src/gallium/auxiliary/draw/draw_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -31,6 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "draw_private.h"
+#include "draw_pipe.h"
+#include "draw_context.h"
static boolean points( unsigned prim )
{
@@ -56,7 +58,8 @@ static boolean triangles( unsigned prim )
* pipeline stages.
*/
boolean
-draw_need_pipeline(const struct draw_context *draw,
+draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
unsigned int prim )
{
/* Don't have to worry about triangles turning into lines/points
@@ -66,30 +69,30 @@ draw_need_pipeline(const struct draw_context *draw,
if (lines(prim))
{
/* line stipple */
- if (draw->rasterizer->line_stipple_enable && draw->line_stipple)
+ if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple)
return TRUE;
/* wide lines */
- if (draw->rasterizer->line_width > draw->wide_line_threshold)
+ if (rasterizer->line_width > draw->pipeline.wide_line_threshold)
return TRUE;
/* AA lines */
- if (draw->rasterizer->line_smooth && draw->pipeline.aaline)
+ if (rasterizer->line_smooth && draw->pipeline.aaline)
return TRUE;
}
if (points(prim))
{
/* large points */
- if (draw->rasterizer->point_size > draw->wide_point_threshold)
+ if (rasterizer->point_size > draw->pipeline.wide_point_threshold)
return TRUE;
/* AA points */
- if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
+ if (rasterizer->point_smooth && draw->pipeline.aapoint)
return TRUE;
/* point sprites */
- if (draw->rasterizer->point_sprite && draw->point_sprite)
+ if (rasterizer->point_sprite && draw->pipeline.point_sprite)
return TRUE;
}
@@ -97,20 +100,20 @@ draw_need_pipeline(const struct draw_context *draw,
if (triangles(prim))
{
/* polygon stipple */
- if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple)
+ if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple)
return TRUE;
/* unfilled polygons */
- if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
return TRUE;
/* polygon offset */
- if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw)
+ if (rasterizer->offset_cw || rasterizer->offset_ccw)
return TRUE;
/* two-side lighting */
- if (draw->rasterizer->light_twoside)
+ if (rasterizer->light_twoside)
return TRUE;
}
@@ -119,7 +122,7 @@ draw_need_pipeline(const struct draw_context *draw,
*
* Generally this isn't a reason to require the pipeline, though.
*
- if (draw->rasterizer->cull_mode)
+ if (rasterizer->cull_mode)
return TRUE;
*/
@@ -145,15 +148,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
stage->next = next;
/* drawing wide lines? */
- wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold
+ wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold
&& !draw->rasterizer->line_smooth);
/* drawing large points? */
- if (draw->rasterizer->point_sprite && draw->point_sprite)
+ if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
wide_points = FALSE;
- else if (draw->rasterizer->point_size > draw->wide_point_threshold)
+ else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold)
wide_points = TRUE;
else
wide_points = FALSE;
@@ -186,7 +189,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.wide_point;
}
- if (draw->rasterizer->line_stipple_enable && draw->line_stipple) {
+ if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) {
draw->pipeline.stipple->next = next;
next = draw->pipeline.stipple;
precalc_flat = 1; /* only needed for lines really */
@@ -238,7 +241,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
/* Clip stage
*/
- if (!draw->rasterizer->bypass_clipping)
+ if (!draw->bypass_clipping)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
@@ -298,6 +301,8 @@ static void validate_destroy( struct draw_stage *stage )
struct draw_stage *draw_validate_stage( struct draw_context *draw )
{
struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
+ if (stage == NULL)
+ return NULL;
stage->draw = draw;
stage->next = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index f83b441e93..86a7d1c730 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -40,7 +40,9 @@
#include "draw_vbuf.h"
#include "draw_private.h"
#include "draw_vertex.h"
-#include "draw_vf.h"
+#include "draw_pipe.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
/**
@@ -56,7 +58,7 @@ struct vbuf_stage {
/** Vertex size in bytes */
unsigned vertex_size;
- struct draw_vertex_fetch *vf;
+ struct translate *translate;
/* FIXME: we have no guarantee that 'unsigned' is 32bit */
@@ -71,8 +73,11 @@ struct vbuf_stage {
unsigned max_indices;
unsigned nr_indices;
- /** Pipe primitive */
- unsigned prim;
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
+
+ struct translate_cache *cache;
};
@@ -113,68 +118,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr )
}
-#if 0
-static INLINE void
-dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
-{
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
- unsigned i, j, k;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- debug_printf("EMIT_OMIT:");
- break;
- case EMIT_ALL:
- assert(i == 0);
- assert(j == 0);
- debug_printf("EMIT_ALL:\t");
- for(k = 0; k < vinfo->size*4; ++k)
- debug_printf("%02x ", *data++);
- break;
- case EMIT_1F:
- debug_printf("EMIT_1F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- debug_printf("EMIT_1F_PSIZE:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_2F:
- debug_printf("EMIT_2F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_3F:
- debug_printf("EMIT_3F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- data += sizeof(float);
- break;
- case EMIT_4F:
- debug_printf("EMIT_4F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_4UB:
- debug_printf("EMIT_4UB:\t");
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- break;
- default:
- assert(0);
- }
- debug_printf("\n");
- }
- debug_printf("\n");
-}
-#endif
/**
@@ -184,109 +127,25 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here. We only use the vertex->data[] fields.
*/
-static INLINE void
+static INLINE ushort
emit_vertex( struct vbuf_stage *vbuf,
struct vertex_header *vertex )
{
-#if 0
- debug_printf("emit vertex %d to %p\n",
- vbuf->nr_vertices, vbuf->vertex_ptr);
-#endif
-
- if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
- if(vertex->vertex_id < vbuf->nr_vertices)
- return;
- else
- debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n",
- vertex->vertex_id, vbuf->nr_vertices);
- return;
- }
-
- vertex->vertex_id = vbuf->nr_vertices++;
-
- if(!vbuf->vf) {
- const struct vertex_info *vinfo = vbuf->vinfo;
- uint i;
- uint count = 0; /* for debug/sanity */
+ if(vertex->vertex_id == UNDEFINED_VERTEX_ID) {
+ /* Hmm - vertices are emitted one at a time - better make sure
+ * set_buffer is efficient. Consider a special one-shot mode for
+ * translate.
+ */
+ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
+ vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+
+ if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- uint j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_ALL:
- /* just copy the whole vertex as-is to the vbuf */
- assert(i == 0);
- assert(j == 0);
- memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4);
- vbuf->vertex_ptr += vinfo->size;
- count += vinfo->size;
- break;
- case EMIT_HEADER:
- memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex));
- *vbuf->vertex_ptr += sizeof(*vertex) / 4;
- count += sizeof(*vertex) / 4;
- break;
- case EMIT_1F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- count++;
- break;
- case EMIT_1F_PSIZE:
- *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
- count++;
- break;
- case EMIT_2F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- count += 2;
- break;
- case EMIT_3F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- count += 3;
- break;
- case EMIT_4F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
- count += 4;
- break;
- case EMIT_4UB:
- *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
- float_to_ubyte( vertex->data[j][1] ),
- float_to_ubyte( vertex->data[j][0] ),
- float_to_ubyte( vertex->data[j][3] ));
- count += 1;
- break;
- default:
- assert(0);
- }
- }
- assert(count == vinfo->size);
-#if 0
- {
- static float data[256];
- draw_vf_emit_vertex(vbuf->vf, vertex, data);
- if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) {
- debug_printf("With VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data);
- debug_printf("Without VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size);
- assert(0);
- }
- }
-#endif
- }
- else {
- draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr);
-
vbuf->vertex_ptr += vbuf->vertex_size/4;
+ vertex->vertex_id = vbuf->nr_vertices++;
}
+
+ return vertex->vertex_id;
}
@@ -300,9 +159,7 @@ vbuf_tri( struct draw_stage *stage,
check_space( vbuf, 3 );
for (i = 0; i < 3; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -317,9 +174,7 @@ vbuf_line( struct draw_stage *stage,
check_space( vbuf, 2 );
for (i = 0; i < 2; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -332,43 +187,110 @@ vbuf_point( struct draw_stage *stage,
check_space( vbuf, 1 );
- emit_vertex( vbuf, prim->v[0] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
}
+
+
/**
* Set the prim type for subsequent vertices.
* This may result in a new vertex size. The existing vbuffer (if any)
* will be flushed if needed and a new one allocated.
*/
static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim )
+vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
{
- const struct vertex_info *vinfo;
- unsigned vertex_size;
-
- assert(newprim == PIPE_PRIM_POINTS ||
- newprim == PIPE_PRIM_LINES ||
- newprim == PIPE_PRIM_TRIANGLES);
+ struct translate_key hw_key;
+ unsigned dst_offset;
+ unsigned i;
- vbuf->prim = newprim;
- vbuf->render->set_primitive(vbuf->render, newprim);
+ vbuf->render->set_primitive(vbuf->render, prim);
- vinfo = vbuf->render->get_vertex_info(vbuf->render);
- vertex_size = vinfo->size * sizeof(float);
+ /* Must do this after set_primitive() above:
+ *
+ * XXX: need some state managment to track when this needs to be
+ * recalculated. The driver should tell us whether there was a
+ * state change.
+ */
+ vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
- if (vertex_size != vbuf->vertex_size)
+ if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) {
vbuf_flush_vertices(vbuf);
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
+ }
- vbuf->vinfo = vinfo;
- vbuf->vertex_size = vertex_size;
- if(vbuf->vf)
- draw_vf_set_vertex_info(vbuf->vf,
- vbuf->vinfo,
- vbuf->stage.draw->rasterizer->point_size);
-
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+
+ for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) );
+
+ switch (vbuf->vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ break;
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vbuf->vinfo->num_attribs;
+ hw_key.output_stride = vbuf->vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!vbuf->translate ||
+ translate_key_compare(&vbuf->translate->key, &hw_key) != 0)
+ {
+ translate_key_sanitize(&hw_key);
+ vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
+
+ vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
+ }
+
+ vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
+
+ /* Allocate new buffer?
+ */
if (!vbuf->vertices)
vbuf_alloc_vertices(vbuf);
}
@@ -422,29 +344,9 @@ vbuf_flush_indices( struct vbuf_stage *vbuf )
assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
- switch(vbuf->prim) {
- case PIPE_PRIM_POINTS:
- break;
- case PIPE_PRIM_LINES:
- assert(vbuf->nr_indices % 2 == 0);
- break;
- case PIPE_PRIM_TRIANGLES:
- assert(vbuf->nr_indices % 3 == 0);
- break;
- default:
- assert(0);
- }
-
vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
vbuf->nr_indices = 0;
-
- /* don't need to reset point/line/tri functions */
-#if 0
- stage->point = vbuf_first_point;
- stage->line = vbuf_first_line;
- stage->tri = vbuf_first_tri;
-#endif
}
@@ -486,8 +388,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
vbuf->vertex_ptr = vbuf->vertices;
}
@@ -525,12 +427,15 @@ static void vbuf_destroy( struct draw_stage *stage )
if(vbuf->indices)
align_free( vbuf->indices );
- if(vbuf->vf)
- draw_vf_destroy( vbuf->vf );
+ if(vbuf->translate)
+ vbuf->translate->release( vbuf->translate );
if (vbuf->render)
vbuf->render->destroy( vbuf->render );
+ if (vbuf->cache)
+ translate_cache_destroy(vbuf->cache);
+
FREE( stage );
}
@@ -542,9 +447,8 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
struct vbuf_render *render )
{
struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
-
- if(!vbuf)
- return NULL;
+ if (vbuf == NULL)
+ goto fail;
vbuf->stage.draw = draw;
vbuf->stage.point = vbuf_first_point;
@@ -555,21 +459,27 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
vbuf->stage.destroy = vbuf_destroy;
vbuf->render = render;
+ vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1);
- assert(render->max_indices < UNDEFINED_VERTEX_ID);
- vbuf->max_indices = render->max_indices;
- vbuf->indices = (ushort *)
- align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 );
- if(!vbuf->indices)
- vbuf_destroy(&vbuf->stage);
+ vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
+ sizeof(vbuf->indices[0]),
+ 16 );
+ if (!vbuf->indices)
+ goto fail;
+
+ vbuf->cache = translate_cache_create();
+ if (!vbuf->cache)
+ goto fail;
+
vbuf->vertices = NULL;
vbuf->vertex_ptr = vbuf->vertices;
-
- vbuf->prim = ~0;
-
- if(!GETENV("GALLIUM_NOVF"))
- vbuf->vf = draw_vf_create();
return &vbuf->stage;
+
+ fail:
+ if (vbuf)
+ vbuf_destroy(&vbuf->stage);
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 9a168ce8bd..878c9c7169 100644
--- a/src/gallium/auxiliary/draw/draw_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -32,6 +32,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
+#include "draw_pipe.h"
struct wideline_stage {
@@ -48,19 +49,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
}
-static void wideline_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void wideline_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
-
/**
* Draw a wide line by drawing a quad (two triangles).
@@ -179,9 +167,9 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
wide->stage.draw = draw;
wide->stage.next = NULL;
- wide->stage.point = wideline_point;
+ wide->stage.point = draw_pipe_passthrough_point;
wide->stage.line = wideline_line;
- wide->stage.tri = wideline_tri;
+ wide->stage.tri = draw_pipe_passthrough_tri;
wide->stage.flush = wideline_flush;
wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
wide->stage.destroy = wideline_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 6fc7c9fcd7..ed08573382 100644
--- a/src/gallium/auxiliary/draw/draw_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -31,7 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
struct widepoint_stage {
@@ -60,23 +61,6 @@ widepoint_stage( struct draw_stage *stage )
}
-static void passthrough_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-static void widepoint_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line(stage->next, header);
-}
-
-static void widepoint_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
/**
@@ -199,16 +183,16 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->ybias = 0.0;
if (draw->rasterizer->gl_rasterization_rules) {
- wide->ybias = -0.125;
+ wide->xbias = 0.125;
}
/* XXX we won't know the real size if it's computed by the vertex shader! */
- if ((draw->rasterizer->point_size > draw->wide_point_threshold) ||
- (draw->rasterizer->point_sprite && draw->point_sprite)) {
+ if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) ||
+ (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) {
stage->point = widepoint_point;
}
else {
- stage->point = passthrough_point;
+ stage->point = draw_pipe_passthrough_point;
}
if (draw->rasterizer->point_sprite) {
@@ -265,17 +249,26 @@ static void widepoint_destroy( struct draw_stage *stage )
struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
{
struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
+ if (wide == NULL)
+ goto fail;
- draw_alloc_temp_verts( &wide->stage, 4 );
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
wide->stage.draw = draw;
wide->stage.next = NULL;
wide->stage.point = widepoint_first_point;
- wide->stage.line = widepoint_line;
- wide->stage.tri = widepoint_tri;
+ wide->stage.line = draw_pipe_passthrough_line;
+ wide->stage.tri = draw_pipe_passthrough_tri;
wide->stage.flush = widepoint_flush;
wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter;
wide->stage.destroy = widepoint_destroy;
return &wide->stage;
+
+ fail:
+ if (wide)
+ wide->stage.destroy( &wide->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c
deleted file mode 100644
index 51b6950334..0000000000
--- a/src/gallium/auxiliary/draw/draw_prim.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-
-#define RP_NONE 0
-#define RP_POINT 1
-#define RP_LINE 2
-#define RP_TRI 3
-
-
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- RP_POINT,
- RP_LINE,
- RP_LINE,
- RP_LINE,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI
-};
-
-
-static void draw_prim_queue_flush( struct draw_context *draw )
-{
- unsigned i;
-
- if (0)
- debug_printf("Flushing with %d prims, %d verts\n",
- draw->pq.queue_nr, draw->vs.queue_nr);
-
- assert (draw->pq.queue_nr != 0);
-
- /* NOTE: we cannot save draw->pipeline->first in a local var because
- * draw->pipeline->first is often changed by the first call to tri(),
- * line(), etc.
- */
- if (draw->rasterizer->line_stipple_enable) {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_POINT:
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
- else {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_POINT:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
-
- draw->pq.queue_nr = 0;
- draw->vs.post_nr = 0;
- draw_vertex_cache_unreference( draw );
-}
-
-void draw_do_flush( struct draw_context *draw, unsigned flags )
-{
- if (0)
- debug_printf("Flushing with %d verts, %d prims\n",
- draw->vs.queue_nr,
- draw->pq.queue_nr );
-
- if (draw->flushing)
- return;
-
- draw->flushing = TRUE;
-
- if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
- if (draw->vs.queue_nr) {
- (*draw->shader_queue_flush)(draw);
- }
-
- if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
- if (draw->pq.queue_nr)
- draw_prim_queue_flush(draw);
-
- if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
- draw_vertex_cache_invalidate(draw);
-
- if (flags >= DRAW_FLUSH_STATE_CHANGE) {
- draw->pipeline.first->flush( draw->pipeline.first, flags );
- draw->pipeline.first = draw->pipeline.validate;
- draw->reduced_prim = ~0;
- }
- }
- }
- }
-
- draw->flushing = FALSE;
-}
-
-
-
-/* Return a pointer to a freshly queued primitive header. Ensure that
- * there is room in the vertex cache for a maximum of "nr_verts" new
- * vertices. Flush primitive and/or vertex queues if necessary to
- * make space.
- */
-static struct prim_header *get_queued_prim( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (!draw_vertex_cache_check_space( draw, nr_verts )) {
-// debug_printf("v");
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
- }
- else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
-// debug_printf("p");
- draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
- }
-
- assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH);
-
- return &draw->pq.queue[draw->pq.queue_nr++];
-}
-
-
-
-/**
- * Add a point to the primitive queue.
- * \param i0 index into user's vertex arrays
- */
-static void do_point( struct draw_context *draw,
- unsigned i0 )
-{
- struct prim_header *prim = get_queued_prim( draw, 1 );
-
- prim->reset_line_stipple = 0;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
-}
-
-
-/**
- * Add a line to the primitive queue.
- * \param i0 index into user's vertex arrays
- * \param i1 index into user's vertex arrays
- */
-static void do_line( struct draw_context *draw,
- boolean reset_stipple,
- unsigned i0,
- unsigned i1 )
-{
- struct prim_header *prim = get_queued_prim( draw, 2 );
-
- prim->reset_line_stipple = reset_stipple;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
-}
-
-/**
- * Add a triangle to the primitive queue.
- */
-static void do_triangle( struct draw_context *draw,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
-
-// _mesa_printf("tri %d %d %d\n", i0, i1, i2);
- prim->reset_line_stipple = 1;
- prim->edgeflags = ~0;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
- prim->v[2] = draw->vcache.get_vertex( draw, i2 );
-}
-
-static void do_ef_triangle( struct draw_context *draw,
- boolean reset_stipple,
- unsigned ef_mask,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
- struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 );
- struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 );
- struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 );
-
- prim->reset_line_stipple = reset_stipple;
-
- prim->edgeflags = ef_mask & ((v0->edgeflag << 0) |
- (v1->edgeflag << 1) |
- (v2->edgeflag << 2));
- prim->pad = 0;
- prim->v[0] = v0;
- prim->v[1] = v1;
- prim->v[2] = v2;
-}
-
-
-static void do_ef_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- const unsigned omitEdge2 = ~(1 << 1);
- const unsigned omitEdge3 = ~(1 << 2);
- do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 );
- do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 );
-}
-
-static void do_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- do_triangle( draw, v0, v1, v3 );
- do_triangle( draw, v1, v2, v3 );
-}
-
-
-/**
- * Main entrypoint to draw some number of points/lines/triangles
- */
-static void
-draw_prim( struct draw_context *draw,
- unsigned prim, unsigned start, unsigned count )
-{
- unsigned i;
- boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
- boolean flatfirst =
- (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE;
-
-// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- do_point( draw,
- start + i );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- do_line( draw,
- TRUE,
- start + i + 0,
- start + i + 1);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1, /* XXX: only if vb not split */
- start + i - 1,
- start + i );
- }
-
- do_line( draw,
- 0,
- start + count - 1,
- start + 0 );
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1,
- start + i - 1,
- start + i );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- if (unfilled) {
- for (i = 0; i+2 < count; i += 3) {
- do_ef_triangle( draw,
- 1,
- ~0,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- else {
- for (i = 0; i+2 < count; i += 3) {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- do_triangle( draw,
- start + i + 0,
- start + i + 2,
- start + i + 1 );
- }
- else {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- do_triangle( draw,
- start + i + 1,
- start + i + 0,
- start + i + 2 );
- }
- else {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + i + 1,
- start + i + 2,
- start + 0 );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 4) {
- do_ef_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- do_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 2) {
- do_ef_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- do_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- if (unfilled) {
- unsigned ef_mask = (1<<2) | (1<<0);
-
- for (i = 0; i+2 < count; i++) {
-
- if (i + 3 >= count)
- ef_mask |= (1<<1);
-
- do_ef_triangle( draw,
- i == 0,
- ef_mask,
- start + i + 1,
- start + i + 2,
- start + 0);
-
- ef_mask &= ~(1<<2);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + i + 1,
- start + i + 2,
- start + 0);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-
-
-
-/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
- */
-void
-draw_arrays(struct draw_context *draw, unsigned prim,
- unsigned start, unsigned count)
-{
- if (reduced_prim[prim] != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->reduced_prim = reduced_prim[prim];
- }
-
- /* drawing done here: */
- if (!draw_pt_arrays(draw, prim, start, count)) {
- /* we have to run the whole pipeline */
- draw_prim(draw, prim, start, count);
- }
-}
-
-
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 4d056f6dba..cee58bbf73 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,7 +44,6 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "rtasm/rtasm_x86sse.h"
#include "tgsi/exec/tgsi_exec.h"
#include "tgsi/util/tgsi_scan.h"
@@ -52,9 +51,11 @@
struct pipe_context;
struct gallivm_prog;
struct gallivm_cpu_engine;
+struct draw_vertex_shader;
+struct draw_context;
+struct draw_stage;
+struct vbuf_render;
-struct draw_pt_middle_end;
-struct draw_pt_front_end;
/**
* Basic vertex info.
@@ -68,115 +69,14 @@ struct vertex_header {
float clip[4];
- float data[][4]; /* Note variable size */
+ /* This will probably become float (*data)[4] soon:
+ */
+ float data[][4];
};
/* NOTE: It should match vertex_id size above */
#define UNDEFINED_VERTEX_ID 0xffff
-/* XXX This is too large */
-#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
-
-
-
-/**
- * Basic info for a point/line/triangle primitive.
- */
-struct prim_header {
- float det; /**< front/back face determinant */
- unsigned reset_line_stipple:1;
- unsigned edgeflags:3;
- unsigned pad:28;
- struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */
-};
-
-
-
-struct draw_context;
-
-/**
- * Base class for all primitive drawing stages.
- */
-struct draw_stage
-{
- struct draw_context *draw; /**< parent context */
-
- struct draw_stage *next; /**< next stage in pipeline */
-
- struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
- unsigned nr_tmps;
-
- void (*point)( struct draw_stage *,
- struct prim_header * );
-
- void (*line)( struct draw_stage *,
- struct prim_header * );
-
- void (*tri)( struct draw_stage *,
- struct prim_header * );
-
- void (*flush)( struct draw_stage *,
- unsigned flags );
-
- void (*reset_stipple_counter)( struct draw_stage * );
-
- void (*destroy)( struct draw_stage * );
-};
-
-
-#define PRIM_QUEUE_LENGTH 32
-#define VCACHE_SIZE 32
-#define VCACHE_OVERFLOW 4
-#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */
-
-/**
- * Private version of the compiled vertex_shader
- */
-struct draw_vertex_shader {
-
- /* This member will disappear shortly:
- */
- struct pipe_shader_state state;
-
- struct tgsi_shader_info info;
-
- void (*prepare)( struct draw_vertex_shader *shader,
- struct draw_context *draw );
-
- /* Run the shader - this interface will get cleaned up in the
- * future:
- */
- void (*run)( struct draw_vertex_shader *shader,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- struct vertex_header *vOut[] );
-
-
- void (*delete)( struct draw_vertex_shader * );
-};
-
-
-/* Internal function for vertex fetch.
- */
-typedef void (*fetch_func)(const void *ptr, float *attrib);
-
-fetch_func draw_get_fetch_func( enum pipe_format format );
-
-
-
-typedef void (*full_fetch_func)( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count );
-
-typedef void (*pt_fetch_func)( struct draw_context *draw,
- float *out,
- unsigned start,
- unsigned count );
-
-
-struct vbuf_render;
/**
* Private context for the drawing module.
@@ -203,6 +103,17 @@ struct draw_context
struct draw_stage *wide_line;
struct draw_stage *wide_point;
struct draw_stage *rasterize;
+
+ float wide_point_threshold; /**< convert pnts to tris if larger than this */
+ float wide_line_threshold; /**< convert lines to tris if wider than this */
+ boolean line_stipple; /**< do line stipple? */
+ boolean point_sprite; /**< convert points to quads for sprites? */
+
+ /* Temporary storage while the pipeline is being run:
+ */
+ char *verts;
+ unsigned vertex_stride;
+ unsigned vertex_count;
} pipeline;
@@ -211,70 +122,64 @@ struct draw_context
/* Support prototype passthrough path:
*/
struct {
- unsigned prim; /* XXX: to be removed */
- unsigned hw_vertex_size; /* XXX: to be removed */
-
struct {
struct draw_pt_middle_end *fetch_emit;
- struct draw_pt_middle_end *fetch_pipeline;
- struct draw_pt_middle_end *fetch_shade_emit;
- struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit;
+ struct draw_pt_middle_end *general;
} middle;
struct {
- struct draw_pt_front_end *noop;
- struct draw_pt_front_end *split_arrays;
struct draw_pt_front_end *vcache;
+ struct draw_pt_front_end *varray;
} front;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_buffers;
+
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+
+ /* user-space vertex data, buffers */
struct {
- char *verts;
- unsigned vertex_stride;
- unsigned vertex_count;
- } pipeline;
+ const unsigned *edgeflag;
+
+ /** vertex element/index buffer (ex: glDrawElements) */
+ const void *elts;
+ /** bytes per index (0, 1, 2 or 4) */
+ unsigned eltSize;
+
+ /** vertex arrays */
+ const void *vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** constant buffer (for vertex shader) */
+ const void *constants;
+ } user;
} pt;
- boolean flushing;
+ struct {
+ boolean bypass_clipping;
+ } driver;
+
+ boolean flushing; /**< debugging/sanity */
+ boolean suspend_flushing; /**< internally set */
+ boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+
struct draw_vertex_shader *vertex_shader;
boolean identity_viewport;
uint num_vs_outputs; /**< convenience, from vertex_shader */
- /* user-space vertex data, buffers */
- struct {
- const unsigned *edgeflag;
-
- /** vertex element/index buffer (ex: glDrawElements) */
- const void *elts;
- /** bytes per index (0, 1, 2 or 4) */
- unsigned eltSize;
-
- /** vertex arrays */
- const void *vbuffer[PIPE_MAX_ATTRIBS];
-
- /** constant buffer (for vertex shader) */
- const void *constants;
- } user;
/* Clip derived state:
*/
float plane[12][4];
unsigned nr_planes;
- float wide_point_threshold; /**< convert pnts to tris if larger than this */
- float wide_line_threshold; /**< convert lines to tris if wider than this */
- boolean line_stipple; /**< do line stipple? */
- boolean point_sprite; /**< convert points to quads for sprites? */
- boolean use_sse;
-
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
@@ -288,61 +193,6 @@ struct draw_context
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
- /* Vertex fetch internal state
- */
- struct {
- const ubyte *src_ptr[PIPE_MAX_ATTRIBS];
- unsigned pitch[PIPE_MAX_ATTRIBS];
- fetch_func fetch[PIPE_MAX_ATTRIBS];
- unsigned nr_attrs;
- full_fetch_func fetch_func;
- pt_fetch_func pt_fetch;
- } vertex_fetch;
-
- /* Post-tnl vertex cache:
- */
- struct {
- unsigned referenced; /**< bitfield */
-
- struct {
- unsigned in; /* client array element */
- unsigned out; /* index in vs queue/array */
- } idx[VCACHE_SIZE + VCACHE_OVERFLOW];
-
- unsigned overflow;
-
- /** To find space in the vertex cache: */
- struct vertex_header *(*get_vertex)( struct draw_context *draw,
- unsigned i );
- } vcache;
-
- /* Vertex shader queue:
- */
- struct {
- struct {
- unsigned elt; /**< index into the user's vertex arrays */
- struct vertex_header *vertex;
- } queue[VS_QUEUE_LENGTH];
- unsigned queue_nr;
- unsigned post_nr;
- } vs;
-
- /**
- * Run the vertex shader on all vertices in the vertex queue.
- */
- void (*shader_queue_flush)(struct draw_context *draw);
-
- /* Prim pipeline queue:
- */
- struct {
- /* Need to queue up primitives until their vertices have been
- * transformed by a vs queue flush.
- */
- struct prim_header queue[PRIM_QUEUE_LENGTH];
- unsigned queue_nr;
- } pq;
-
-
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
@@ -351,101 +201,70 @@ struct draw_context
-extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
-extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
-extern struct draw_stage *draw_offset_stage( struct draw_context *context );
-extern struct draw_stage *draw_clip_stage( struct draw_context *context );
-extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
-extern struct draw_stage *draw_cull_stage( struct draw_context *context );
-extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
-extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
-extern struct draw_stage *draw_wide_point_stage( struct draw_context *context );
-extern struct draw_stage *draw_validate_stage( struct draw_context *context );
-extern void draw_free_temp_verts( struct draw_stage *stage );
-extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
-extern void draw_reset_vertex_ids( struct draw_context *draw );
+/*******************************************************************************
+ * Vertex processing (was passthrough) code:
+ */
+boolean draw_pt_init( struct draw_context *draw );
+void draw_pt_destroy( struct draw_context *draw );
+void draw_pt_reset_vertex_ids( struct draw_context *draw );
-extern int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts );
+/*******************************************************************************
+ * Primitive processing (pipeline) code:
+ */
-extern void draw_vertex_cache_invalidate( struct draw_context *draw );
-extern void draw_vertex_cache_unreference( struct draw_context *draw );
-extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
+boolean draw_pipeline_init( struct draw_context *draw );
+void draw_pipeline_destroy( struct draw_context *draw );
-extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
-extern void draw_update_vertex_fetch( struct draw_context *draw );
-extern boolean draw_need_pipeline(const struct draw_context *draw,
- unsigned prim );
-/* Passthrough mode (second attempt):
+/* We use the top few bits in the elts[] parameter to convey a little
+ * API information. This limits the number of vertices we can address
+ * to only 4096 -- if that becomes a problem, we can switch to 32-bit
+ * draw indices.
+ *
+ * These flags expected at first vertex of lines & triangles when
+ * unfilled and/or line stipple modes are operational.
*/
-boolean draw_pt_init( struct draw_context *draw );
-void draw_pt_destroy( struct draw_context *draw );
-boolean draw_pt_arrays( struct draw_context *draw,
+#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
+#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
+#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
+#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
+#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
+#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+
+void draw_pipeline_run( struct draw_context *draw,
unsigned prim,
- unsigned start,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
unsigned count );
-void draw_pt_reset_vertex_ids( struct draw_context *draw );
-void draw_pt_run_pipeline( struct draw_context *draw,
- unsigned prim,
- char *verts,
- unsigned vertex_stride,
- unsigned vertex_count,
- const ushort *elts,
- unsigned count );
-
-
-#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
-#define DRAW_FLUSH_PRIM_QUEUE 0x2
-#define DRAW_FLUSH_VERTEX_CACHE 0x4
+
+
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags );
+
+
+
+/*******************************************************************************
+ * Flushing
+ */
+
#define DRAW_FLUSH_STATE_CHANGE 0x8
#define DRAW_FLUSH_BACKEND 0x10
void draw_do_flush( struct draw_context *draw, unsigned flags );
-boolean draw_get_edgeflag( struct draw_context *draw,
- unsigned idx );
-
-/**
- * Get a writeable copy of a vertex.
- * \param stage drawing stage info
- * \param vert the vertex to copy (source)
- * \param idx index into stage's tmp[] array to put the copy (dest)
- * \return pointer to the copied vertex
- */
-static INLINE struct vertex_header *
-dup_vert( struct draw_stage *stage,
- const struct vertex_header *vert,
- unsigned idx )
-{
- struct vertex_header *tmp = stage->tmp[idx];
- const uint vsize = sizeof(struct vertex_header)
- + stage->draw->num_vs_outputs * 4 * sizeof(float);
- memcpy(tmp, vert, vsize);
- tmp->vertex_id = UNDEFINED_VERTEX_ID;
- return tmp;
-}
-
-static INLINE float
-dot4(const float *a, const float *b)
-{
- float result = (a[0]*b[0] +
- a[1]*b[1] +
- a[2]*b[2] +
- a[3]*b[3]);
- return result;
-}
#endif /* DRAW_PRIVATE_H */
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index f59fb86f78..c9c5d18313 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -36,153 +36,63 @@
#include "draw/draw_pt.h"
-/* XXX: Shouldn't those two functions below use the '>' operator???
- */
-
-static boolean too_many_verts( struct draw_context *draw,
- unsigned verts )
-{
- return verts < 1024;
-}
-
-static boolean too_many_elts( struct draw_context *draw,
- unsigned elts )
-{
- return elts < (16 * 1024);
-}
-boolean
+/* Overall we split things into:
+ * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - middle -- fetch, shade, cliptest, viewport
+ * - pipeline -- the prim pipeline: clipping, wide lines, etc
+ * - backend -- the vbuf_render provided by the driver.
+ */
+static boolean
draw_pt_arrays(struct draw_context *draw,
unsigned prim,
unsigned start,
unsigned count)
{
- const boolean pipeline = draw_need_pipeline(draw, prim);
- const boolean cliptest = !draw->rasterizer->bypass_clipping;
- const boolean shading = !draw->rasterizer->bypass_vs;
struct draw_pt_front_end *frontend = NULL;
struct draw_pt_middle_end *middle = NULL;
+ unsigned opt = 0;
-
- /* Overall we do:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
- * - middle -- fetch, shade, cliptest, viewport
- * - pipeline -- the prim pipeline: clipping, wide lines, etc
- * - backend -- the vbuf_render provided by the driver.
- */
-
-
- if (!cliptest && !pipeline && !shading) {
- /* This is the 'passthrough' path:
- */
- /* Fetch user verts, emit hw verts:
- */
- middle = draw->pt.middle.fetch_emit;
- }
- else if (!cliptest && !shading) {
- /* This is the 'passthrough' path targetting the pipeline backend.
- */
- /* Fetch user verts, emit pipeline verts, run pipeline:
- */
- middle = draw->pt.middle.fetch_pipeline;
+ if (!draw->render) {
+ opt |= PT_PIPELINE;
}
-#if 0
- else if (!cliptest && !pipeline) {
- /* Fetch user verts, run vertex shader, emit hw verts:
- */
- middle = draw->pt.middle.fetch_shade_emit;
- }
- else if (!pipeline) {
- /* Even though !pipeline, we have to run it to get clipping. We
- * do know that the pipeline is just the clipping operation, but
- * that probably doesn't help much.
- *
- * This is going to be the most important path for a lot of
- * swtnl cards.
- */
- /* Fetch user verts,
- * run vertex shader,
- * cliptest and viewport trasform
- * if no clipped vertices,
- * emit hw verts
- * else
- * run pipline
- */
- middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit;
- }
- else if (!cliptest) {
- /* Fetch user verts, run vertex shader, run pipeline:
- */
- middle = draw->pt.middle.fetch_shade_pipeline;
+
+ if (draw_need_pipeline(draw,
+ draw->rasterizer,
+ prim)) {
+ opt |= PT_PIPELINE;
}
- else {
- /* This is what we're currently always doing:
- */
- /* Fetch user verts, run vertex shader, cliptest, run pipeline:
- */
- middle = draw->pt.middle.fetch_shade_cliptest_pipeline;
+
+ if (!draw->bypass_clipping) {
+ opt |= PT_CLIPTEST;
}
-#else
- else {
- return FALSE;
+
+ if (!draw->rasterizer->bypass_vs) {
+ opt |= PT_SHADE;
}
-#endif
+ if (opt)
+ middle = draw->pt.middle.general;
+ else
+ middle = draw->pt.middle.fetch_emit;
- /* If !pipeline, need to make sure we respect the driver's limited
- * capabilites to receive blocks of vertex data and elements.
- */
-#if 0
- if (!pipeline) {
- unsigned vertex_mode = passthrough;
- unsigned nr_verts = count_vertices( draw, start, count );
- unsigned hw_prim = prim;
-
- if (is_elts(draw)) {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
-
- if (too_many_verts(nr_verts)) {
- /* if (is_verts(draw) && can_split(prim)) {
- draw = draw_arrays_split;
- }
- else */ {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
- }
-
- if (too_many_elts(count)) {
-
- /* if (is_elts(draw) && can_split(prim)) {
- draw = draw_elts_split;
- }
- else */ {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
- }
-
- if (!good_prim(hw_prim)) {
- draw = draw->pt.front.vcache;
- }
- }
-#else
- frontend = draw->pt.front.vcache;
-#endif
- /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ /* Pick the right frontend
*/
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ if (draw->pt.user.elts ||
+ count >= 256) {
+ frontend = draw->pt.front.vcache;
+ } else {
+ frontend = draw->pt.front.varray;
+ }
- frontend->prepare( frontend, prim, middle );
+ frontend->prepare( frontend, prim, middle, opt );
- frontend->run( frontend,
- draw_pt_elt_func( draw ),
- draw_pt_elt_ptr( draw, start ),
- count );
+ frontend->run(frontend,
+ draw_pt_elt_func(draw),
+ draw_pt_elt_ptr(draw, start),
+ count);
frontend->finish( frontend );
@@ -192,16 +102,20 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
- draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
- if (!draw->pt.middle.fetch_emit)
+ draw->pt.front.vcache = draw_pt_vcache( draw );
+ if (!draw->pt.front.vcache)
return FALSE;
- draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw );
- if (!draw->pt.middle.fetch_pipeline)
+ draw->pt.front.varray = draw_pt_varray(draw);
+ if (!draw->pt.front.varray)
return FALSE;
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
+ draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
+ if (!draw->pt.middle.fetch_emit)
+ return FALSE;
+
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+ if (!draw->pt.middle.general)
return FALSE;
return TRUE;
@@ -210,18 +124,68 @@ boolean draw_pt_init( struct draw_context *draw )
void draw_pt_destroy( struct draw_context *draw )
{
+ if (draw->pt.middle.general) {
+ draw->pt.middle.general->destroy( draw->pt.middle.general );
+ draw->pt.middle.general = NULL;
+ }
+
if (draw->pt.middle.fetch_emit) {
draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit );
draw->pt.middle.fetch_emit = NULL;
}
- if (draw->pt.middle.fetch_pipeline) {
- draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline );
- draw->pt.middle.fetch_pipeline = NULL;
- }
-
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
}
+
+ if (draw->pt.front.varray) {
+ draw->pt.front.varray->destroy( draw->pt.front.varray );
+ draw->pt.front.varray = NULL;
+ }
+}
+
+
+
+static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+/**
+ * Draw vertex arrays
+ * This is the main entrypoint into the drawing module.
+ * \param prim one of PIPE_PRIM_x
+ * \param start index of first vertex to draw
+ * \param count number of vertices to draw
+ */
+void
+draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count)
+{
+ if (reduced_prim[prim] != draw->reduced_prim) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->reduced_prim = reduced_prim[prim];
+ }
+
+ /* drawing done here: */
+ draw_pt_arrays(draw, prim, start, count);
+}
+
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx )
+{
+ if (draw->pt.user.edgeflag)
+ return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
+ else
+ return 1;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 590823fd33..2dec376cee 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -40,14 +40,11 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
struct draw_pt_middle_end;
struct draw_context;
-/* We use the top couple of bits in the vertex fetch index to convey a
- * little API information. This limits the number of vertices we can
- * address to only 1 billion -- if that becomes a problem, these could
- * be moved out & passed separately.
- */
-#define DRAW_PT_EDGEFLAG (1<<30)
-#define DRAW_PT_RESET_STIPPLE (1<<31)
-#define DRAW_PT_FLAG_MASK (3<<30)
+
+#define PT_SHADE 0x1
+#define PT_CLIPTEST 0x2
+#define PT_PIPELINE 0x4
+#define PT_MAX_MIDDLE 0x8
/* The "front end" - prepare sets of fetch, draw elements for the
@@ -64,7 +61,8 @@ struct draw_context;
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
unsigned prim,
- struct draw_pt_middle_end * );
+ struct draw_pt_middle_end *,
+ unsigned opt );
void (*run)( struct draw_pt_front_end *,
pt_elt_func elt_func,
@@ -82,15 +80,11 @@ struct draw_pt_front_end {
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
- * Later:
- * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit
- * - fetch, vertex shade, emit
- *
- * Currenly only using the passthrough version.
*/
struct draw_pt_middle_end {
void (*prepare)( struct draw_pt_middle_end *,
- unsigned prim );
+ unsigned prim,
+ unsigned opt );
void (*run)( struct draw_pt_middle_end *,
const unsigned *fetch_elts,
@@ -104,12 +98,9 @@ struct draw_pt_middle_end {
/* The "back end" - supplied by the driver, defined in draw_vbuf.h.
- *
- * Not sure whether to wrap the prim pipeline up as an alternate
- * backend. Would be a win for everything except pure passthrough
- * mode...
*/
struct vbuf_render;
+struct vertex_header;
/* Helper functions.
@@ -118,12 +109,89 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw );
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start );
-/* Implementations:
+/* Frontends:
+ *
+ * Currently only the general-purpose vcache implementation, could add
+ * a special case for tiny vertex buffers.
*/
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
+struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+
+/* Middle-ends:
+ *
+ * Currently one general-purpose case which can do all possibilities,
+ * at the slight expense of creating a vertex_header in some cases
+ * unecessarily.
+ *
+ * The special case fetch_emit code avoids pipeline vertices
+ * altogether and builds hardware vertices directly from API
+ * vertex_elements.
+ */
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
-struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw );
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+
+
+/* More helpers:
+ */
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx );
+
+
+/*******************************************************************************
+ * HW vertex emit:
+ */
+struct pt_emit;
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim );
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count );
+
+void draw_pt_emit_destroy( struct pt_emit *emit );
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+
+
+/*******************************************************************************
+ * API vertex fetch:
+ */
+
+struct pt_fetch;
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vertex_size );
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts );
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch );
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
+
+/*******************************************************************************
+ * Post-VS: cliptest, rhw, viewport
+ */
+struct pt_post_vs;
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned stride,
+ unsigned count );
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl );
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
index d49770e7b2..2094c081ed 100644
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -59,7 +59,7 @@ static unsigned elt_vert( const void *elts, unsigned idx )
pt_elt_func draw_pt_elt_func( struct draw_context *draw )
{
- switch (draw->user.eltSize) {
+ switch (draw->pt.user.eltSize) {
case 0: return elt_vert;
case 1: return elt_ubyte;
case 2: return elt_ushort;
@@ -71,9 +71,9 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw )
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start )
{
- const char *elts = draw->user.elts;
+ const char *elts = draw->pt.user.elts;
- switch (draw->user.eltSize) {
+ switch (draw->pt.user.eltSize) {
case 0:
return (const void *)(((const ubyte *)NULL) + start);
case 1:
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
new file mode 100644
index 0000000000..ce3a153f64
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -0,0 +1,203 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+struct pt_emit {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ struct translate_cache *cache;
+};
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim )
+{
+ struct draw_context *draw = emit->draw;
+ const struct vertex_info *vinfo;
+ unsigned dst_offset;
+ struct translate_key hw_key;
+ unsigned i;
+ boolean ok;
+
+ ok = draw->render->set_primitive(draw->render, prim);
+ if (!ok) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = draw->render->get_vertex_info(draw->render);
+
+
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
+
+
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vinfo->num_attribs;
+ hw_key.output_stride = vinfo->size * 4;
+
+ if (!emit->translate ||
+ translate_key_compare(&emit->translate->key, &hw_key) != 0)
+ {
+ translate_key_sanitize(&hw_key);
+ emit->translate = translate_cache_find(emit->cache, &hw_key);
+ }
+}
+
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)vertex_count);
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ translate->set_buffer(translate,
+ 0,
+ vertex_data,
+ stride );
+
+ translate->set_buffer(translate,
+ 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run( translate,
+ 0,
+ vertex_count,
+ hw_verts );
+
+ render->draw(render,
+ elts,
+ count);
+
+ render->release_vertices(render,
+ hw_verts,
+ translate->key.output_stride,
+ vertex_count);
+}
+
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
+{
+ struct pt_emit *emit = CALLOC_STRUCT(pt_emit);
+ if (!emit)
+ return NULL;
+
+ emit->draw = draw;
+ emit->cache = translate_cache_create();
+ if (!emit->cache) {
+ FREE(emit);
+ return NULL;
+ }
+
+ return emit;
+}
+
+void draw_pt_emit_destroy( struct pt_emit *emit )
+{
+ if (emit->cache)
+ translate_cache_destroy(emit->cache);
+
+ FREE(emit);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
new file mode 100644
index 0000000000..100117a9ae
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -0,0 +1,191 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+
+struct pt_fetch {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ unsigned vertex_size;
+ boolean need_edgeflags;
+
+ struct translate_cache *cache;
+};
+
+/* Perform the fetch from API vertex elements & vertex buffers, to a
+ * contiguous set of float[4] attributes as required for the
+ * vertex_shader->run_linear() method.
+ *
+ * This is used in all cases except pure passthrough
+ * (draw_pt_fetch_emit.c) which has its own version to translate
+ * directly to hw vertices.
+ *
+ */
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vertex_size )
+{
+ struct draw_context *draw = fetch->draw;
+ unsigned i, nr = 0;
+ unsigned dst_offset = 0;
+ struct translate_key key;
+
+ fetch->vertex_size = vertex_size;
+
+ /* Always emit/leave space for a vertex header.
+ *
+ * It's worth considering whether the vertex headers should contain
+ * a pointer to the 'data', rather than having it inline.
+ * Something to look at after we've fully switched over to the pt
+ * paths.
+ */
+ {
+ /* Need to set header->vertex_id = 0xffff somehow.
+ */
+ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
+ key.element[nr].input_offset = 0;
+ key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+ dst_offset += 1 * sizeof(float);
+ nr++;
+
+
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
+ }
+
+
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
+ key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
+ key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
+ key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+
+ dst_offset += 4 * sizeof(float);
+ nr++;
+ }
+
+ assert(dst_offset <= vertex_size);
+
+ key.nr_elements = nr;
+ key.output_stride = vertex_size;
+
+
+ if (!fetch->translate ||
+ translate_key_compare(&fetch->translate->key, &key) != 0)
+ {
+ translate_key_sanitize(&key);
+ fetch->translate = translate_cache_find(fetch->cache, &key);
+
+ {
+ static struct vertex_header vh = { 0, 1, 0, 0xffff };
+ fetch->translate->set_buffer(fetch->translate,
+ draw->pt.nr_vertex_buffers,
+ &vh,
+ 0);
+ }
+ }
+
+ fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) &&
+ draw->pt.user.edgeflag);
+}
+
+
+
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
+
+ translate->run_elts( translate,
+ elts,
+ count,
+ verts );
+
+ /* Edgeflags are hard to fit into a translate program, populate
+ * them separately if required. In the setup above they are
+ * defaulted to one, so only need this if there is reason to change
+ * that default:
+ */
+ if (fetch->need_edgeflags) {
+ for (i = 0; i < count; i++) {
+ struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
+ vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] );
+ }
+ }
+}
+
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
+{
+ struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
+ if (!fetch)
+ return NULL;
+
+ fetch->draw = draw;
+ fetch->cache = translate_cache_create();
+ if (!fetch->cache) {
+ FREE(fetch);
+ return NULL;
+ }
+
+ return fetch;
+}
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch )
+{
+ translate_cache_destroy(fetch->cache);
+
+ FREE(fetch);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 0806076956..b7b970a297 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -36,6 +36,8 @@
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
/* The simplest 'middle end' in the new vertex code.
*
@@ -72,107 +74,30 @@
struct fetch_emit_middle_end {
struct draw_pt_middle_end base;
struct draw_context *draw;
-
- struct {
- const ubyte *ptr;
- unsigned pitch;
- void (*fetch)( const void *from, float *attrib);
- void (*emit)( const float *attrib, float **out );
- } fetch[PIPE_MAX_ATTRIBS];
- unsigned nr_fetch;
- unsigned hw_vertex_size;
-};
-
-
-
-static void fetch_R32_FLOAT( const void *from,
- float *attrib )
-{
- float *f = (float *) from;
- attrib[0] = f[0];
- attrib[1] = 0.0;
- attrib[2] = 0.0;
- attrib[3] = 1.0;
-}
-
-
-static void emit_R32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out) += 1;
-}
-
-static void emit_R32G32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out) += 2;
-}
+ struct translate *translate;
+ const struct vertex_info *vinfo;
-static void emit_R32G32B32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out) += 3;
-}
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out)[3] = attrib[3];
- (*out) += 4;
-}
+ struct translate_cache *cache;
+};
-/**
- * General-purpose fetch from user's vertex arrays, emit to driver's
- * vertex buffer.
- *
- * XXX this is totally temporary.
- */
-static void
-fetch_store_general( struct fetch_emit_middle_end *feme,
- void *out_ptr,
- const unsigned *fetch_elts,
- unsigned count )
-{
- float *out = (float *)out_ptr;
- struct vbuf_render *render = feme->draw->render;
- uint i, j;
-
- for (i = 0; i < count; i++) {
- unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK;
-
- for (j = 0; j < feme->nr_fetch; j++) {
- float attrib[4];
- const ubyte *from = (feme->fetch[j].ptr +
- feme->fetch[j].pitch * elt);
-
- feme->fetch[j].fetch( from, attrib );
- feme->fetch[j].emit( attrib, &out );
- }
- }
-}
-
static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
- unsigned prim )
+ unsigned prim,
+ unsigned opt )
{
- static const float zero = 0;
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
const struct vertex_info *vinfo;
- unsigned i;
+ unsigned i, dst_offset;
boolean ok;
+ struct translate_key key;
ok = draw->render->set_primitive( draw->render,
@@ -184,55 +109,93 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
/* Must do this after set_primitive() above:
*/
- vinfo = draw->render->get_vertex_info(draw->render);
+ vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render);
+
+
- for (i = 0; i < vinfo->num_attribs; i++) {
- unsigned src_element = vinfo->src_index[i];
- unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index;
-
- feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] +
- draw->vertex_buffer[src_buffer].buffer_offset +
- draw->vertex_element[src_element].src_offset);
+ /* Transform from API vertices to HW vertices, skipping the
+ * pipeline_vertex intermediate step.
+ */
+ dst_offset = 0;
+ memset(&key, 0, sizeof(key));
- feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch;
-
- feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format);
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]];
+ unsigned emit_sz = 0;
+ unsigned input_format = src->src_format;
+ unsigned input_buffer = src->vertex_buffer_index;
+ unsigned input_offset = src->src_offset;
+ unsigned output_format;
switch (vinfo->emit[i]) {
case EMIT_4F:
- feme->fetch[i].emit = emit_R32G32B32A32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
break;
case EMIT_3F:
- feme->fetch[i].emit = emit_R32G32B32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
break;
case EMIT_2F:
- feme->fetch[i].emit = emit_R32G32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
break;
case EMIT_1F:
- feme->fetch[i].emit = emit_R32_FLOAT;
- break;
- case EMIT_HEADER:
- feme->fetch[i].ptr = (const ubyte *)&zero;
- feme->fetch[i].pitch = 0;
- feme->fetch[i].fetch = fetch_R32_FLOAT;
- feme->fetch[i].emit = emit_R32_FLOAT;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
break;
case EMIT_1F_PSIZE:
- feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size;
- feme->fetch[i].pitch = 0;
- feme->fetch[i].fetch = fetch_R32_FLOAT;
- feme->fetch[i].emit = emit_R32_FLOAT;
+ input_format = PIPE_FORMAT_R32_FLOAT;
+ input_buffer = draw->pt.nr_vertex_buffers;
+ input_offset = 0;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
break;
default:
assert(0);
- feme->fetch[i].emit = NULL;
- break;
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ continue;
}
+
+ key.element[i].input_format = input_format;
+ key.element[i].input_buffer = input_buffer;
+ key.element[i].input_offset = input_offset;
+ key.element[i].output_format = output_format;
+ key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
}
- feme->nr_fetch = vinfo->num_attribs;
- feme->hw_vertex_size = vinfo->size * 4;
+ key.nr_elements = vinfo->num_attribs;
+ key.output_stride = vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!feme->translate ||
+ translate_key_compare(&feme->translate->key, &key) != 0)
+ {
+ translate_key_sanitize(&key);
+ feme->translate = translate_cache_find(feme->cache,
+ &key);
+
+
+ feme->translate->set_buffer(feme->translate,
+ draw->pt.nr_vertex_buffers,
+ &feme->point_size,
+ 0);
+ }
+
+ feme->point_size = draw->rasterizer->point_size;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ feme->translate->set_buffer(feme->translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
}
@@ -249,8 +212,12 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
struct draw_context *draw = feme->draw;
void *hw_verts;
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->hw_vertex_size,
+ (ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
if (!hw_verts) {
assert(0);
@@ -260,10 +227,19 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* Single routine to fetch vertices and emit HW verts.
*/
- fetch_store_general( feme,
- hw_verts,
- fetch_elts,
- fetch_count );
+ feme->translate->run_elts( feme->translate,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
@@ -276,7 +252,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw->render->release_vertices( draw->render,
hw_verts,
- feme->hw_vertex_size,
+ feme->translate->key.output_stride,
fetch_count );
}
@@ -290,6 +266,10 @@ static void fetch_emit_finish( struct draw_pt_middle_end *middle )
static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+
+ translate_cache_destroy(feme->cache);
+
FREE(middle);
}
@@ -297,7 +277,15 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
{
struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end );
-
+ if (fetch_emit == NULL)
+ return NULL;
+
+ fetch_emit->cache = translate_cache_create();
+ if (!fetch_emit->cache) {
+ FREE(fetch_emit);
+ return NULL;
+ }
+
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
fetch_emit->base.finish = fetch_emit_finish;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
deleted file mode 100644
index 4c2a281b29..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_pt.h"
-
-/* The simplest 'middle end' in the new vertex code.
- *
- * The responsibilities of a middle end are to:
- * - perform vertex fetch using
- * - draw vertex element/buffer state
- * - a list of fetch indices we received as an input
- * - run the vertex shader
- * - cliptest,
- * - clip coord calculation
- * - viewport transformation
- * - if necessary, run the primitive pipeline, passing it:
- * - a linear array of vertex_header vertices constructed here
- * - a set of draw indices we received as an input
- * - otherwise, drive the hw backend,
- * - allocate space for hardware format vertices
- * - translate the vertex-shader output vertices to hw format
- * - calling the backend draw functions.
- *
- * For convenience, we provide a helper function to drive the hardware
- * backend given similar inputs to those required to run the pipeline.
- *
- * In the case of passthrough mode, many of these actions are disabled
- * or noops, so we end up doing:
- *
- * - perform vertex fetch
- * - drive the hw backend
- *
- * IE, basically just vertex fetch to post-vs-format vertices,
- * followed by a call to the backend helper function.
- */
-
-
-struct fetch_pipeline_middle_end {
- struct draw_pt_middle_end base;
- struct draw_context *draw;
-
- void (*header)( unsigned idx, float **out);
-
- struct {
- const ubyte *ptr;
- unsigned pitch;
- void (*fetch)( const void *from, float *attrib);
- void (*emit)( const float *attrib, float **out );
- } fetch[PIPE_MAX_ATTRIBS];
-
- unsigned nr_fetch;
- unsigned pipeline_vertex_size;
- unsigned prim;
-};
-
-
-
-static void emit_R32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out) += 1;
-}
-
-static void emit_R32G32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out) += 2;
-}
-
-static void emit_R32G32B32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out) += 3;
-}
-
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out)[3] = attrib[3];
- (*out) += 4;
-}
-
-static void header( unsigned idx,
- float **out )
-{
- struct vertex_header *header = (struct vertex_header *) (*out);
-
- header->clipmask = 0;
- header->edgeflag = 1;
- header->pad = 0;
- header->vertex_id = UNDEFINED_VERTEX_ID;
-
- (*out)[1] = 0;
- (*out)[2] = 0;
- (*out)[3] = 0;
- (*out)[3] = 1;
- (*out) += 5;
-}
-
-
-static void header_ef( unsigned idx,
- float **out )
-{
- struct vertex_header *header = (struct vertex_header *) (*out);
-
- /* XXX: need a reset_stipple flag in the vertex header too?
- */
- header->clipmask = 0;
- header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0;
- header->pad = 0;
- header->vertex_id = UNDEFINED_VERTEX_ID;
-
- (*out)[1] = 0;
- (*out)[2] = 0;
- (*out)[3] = 0;
- (*out)[3] = 1;
- (*out) += 5;
-}
-
-
-/**
- * General-purpose fetch from user's vertex arrays, emit to driver's
- * vertex buffer.
- *
- * XXX this is totally temporary.
- */
-static void
-fetch_store_general( struct fetch_pipeline_middle_end *fpme,
- void *out_ptr,
- const unsigned *fetch_elts,
- unsigned count )
-{
- float *out = (float *)out_ptr;
- uint i, j;
-
- for (i = 0; i < count; i++) {
- unsigned elt = fetch_elts[i];
-
- fpme->header( elt, &out );
- elt &= ~DRAW_PT_FLAG_MASK;
-
- for (j = 0; j < fpme->nr_fetch; j++) {
- float attrib[4];
- const ubyte *from = (fpme->fetch[j].ptr +
- fpme->fetch[j].pitch * elt);
-
- fpme->fetch[j].fetch( from, attrib );
- fpme->fetch[j].emit( attrib, &out );
- }
- }
-}
-
-
-/* We aren't running a vertex shader, but are running the pipeline.
- * That means the vertices we need to build look like:
- *
- * dw0: vertex header (zero?)
- * dw1: clip coord 0
- * dw2: clip coord 1
- * dw3: clip coord 2
- * dw4: clip coord 4
- * dw5: screen coord 0
- * dw6: screen coord 0
- * dw7: screen coord 0
- * dw8: screen coord 0
- * dw9: other attribs...
- *
- */
-static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
- unsigned prim )
-{
- static const float zero = 0;
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned i, nr = 0;
-
- fpme->prim = prim;
-
- /* Emit the vertex header and empty clipspace coord field:
- */
- if (draw->user.edgeflag) {
- fpme->header = header_ef;
- }
- else {
- fpme->header = header;
- }
-
-
- /* Need to look at vertex shader inputs (we know it is a
- * passthrough shader, so these define the outputs too). If we
- * were running a shader, we'd still be looking at the inputs at
- * this point.
- */
- for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
-
- fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset);
-
- fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch;
- fpme->fetch[nr].fetch = draw_get_fetch_func( format );
-
- /* Always do this -- somewhat redundant...
- */
- fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT;
- nr++;
- }
-
- fpme->nr_fetch = nr;
- fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-}
-
-
-
-
-static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
-{
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- char *pipeline_verts;
-
- pipeline_verts = MALLOC( fpme->pipeline_vertex_size *
- fetch_count );
- if (!pipeline_verts) {
- assert(0);
- return;
- }
-
-
- /* Single routine to fetch vertices and emit pipeline verts.
- */
- fetch_store_general( fpme,
- pipeline_verts,
- fetch_elts,
- fetch_count );
-
-
- /* Run the pipeline
- */
- draw_pt_run_pipeline( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fpme->pipeline_vertex_size,
- fetch_count,
- draw_elts,
- draw_count );
-
-
- /* Done -- that was easy, wasn't it:
- */
- FREE( pipeline_verts );
-}
-
-
-
-static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
-{
- /* nothing to do */
-}
-
-static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
-{
- FREE(middle);
-}
-
-
-struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw )
-{
- struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end );
-
- fetch_pipeline->base.prepare = fetch_pipeline_prepare;
- fetch_pipeline->base.run = fetch_pipeline_run;
- fetch_pipeline->base.finish = fetch_pipeline_finish;
- fetch_pipeline->base.destroy = fetch_pipeline_destroy;
-
- fetch_pipeline->draw = draw;
-
- return &fetch_pipeline->base;
-}
-
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
new file mode 100644
index 0000000000..4ec20493c4
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -0,0 +1,235 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+
+
+struct fetch_pipeline_middle_end {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
+
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
+ unsigned prim;
+ unsigned opt;
+};
+
+
+static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vs = draw->vertex_shader;
+
+ /* Add one to num_outputs because the pipeline occasionally tags on
+ * an additional texcoord, eg for AA lines.
+ */
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs + 1 );
+
+ fpme->prim = prim;
+ fpme->opt = opt;
+
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
+ */
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+ draw_pt_fetch_prepare( fpme->fetch,
+ fpme->vertex_size );
+
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ draw->bypass_clipping,
+ draw->identity_viewport,
+ draw->rasterizer->gl_rasterization_rules );
+
+
+ if (!(opt & PT_PIPELINE))
+ draw_pt_emit_prepare( fpme->emit,
+ prim );
+
+ /* No need to prepare the shader.
+ */
+ vs->prepare(vs, draw);
+
+}
+
+
+
+
+static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( fetch_count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run( fpme->fetch,
+ fetch_elts,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+
+ FREE(pipeline_verts);
+}
+
+
+
+static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
+{
+ /* nothing to do */
+}
+
+static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
+ FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw )
+{
+ struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end );
+ if (!fpme)
+ goto fail;
+
+ fpme->base.prepare = fetch_pipeline_prepare;
+ fpme->base.run = fetch_pipeline_run;
+ fpme->base.finish = fetch_pipeline_finish;
+ fpme->base.destroy = fetch_pipeline_destroy;
+
+ fpme->draw = draw;
+
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
+ fpme->emit = draw_pt_emit_create( draw );
+ if (!fpme->emit)
+ goto fail;
+
+ return &fpme->base;
+
+ fail:
+ if (fpme)
+ fetch_pipeline_destroy( &fpme->base );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c
deleted file mode 100644
index e70e63d08f..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_pt.h"
-
-
-/**
- * Add a point to the primitive queue.
- * \param i0 index into user's vertex arrays
- */
-static void do_point( struct draw_context *draw,
- const char *v0 )
-{
- struct prim_header prim;
-
- prim.reset_line_stipple = 0;
- prim.edgeflags = 1;
- prim.pad = 0;
- prim.v[0] = (struct vertex_header *)v0;
-
- draw->pipeline.first->point( draw->pipeline.first, &prim );
-}
-
-
-/**
- * Add a line to the primitive queue.
- * \param i0 index into user's vertex arrays
- * \param i1 index into user's vertex arrays
- */
-static void do_line( struct draw_context *draw,
- const char *v0,
- const char *v1 )
-{
- struct prim_header prim;
-
- prim.reset_line_stipple = 1; /* fixme */
- prim.edgeflags = 1;
- prim.pad = 0;
- prim.v[0] = (struct vertex_header *)v0;
- prim.v[1] = (struct vertex_header *)v1;
-
- draw->pipeline.first->line( draw->pipeline.first, &prim );
-}
-
-/**
- * Add a triangle to the primitive queue.
- */
-static void do_triangle( struct draw_context *draw,
- char *v0,
- char *v1,
- char *v2 )
-{
- struct prim_header prim;
-
- prim.v[0] = (struct vertex_header *)v0;
- prim.v[1] = (struct vertex_header *)v1;
- prim.v[2] = (struct vertex_header *)v2;
- prim.reset_line_stipple = 1;
- prim.edgeflags = ((prim.v[0]->edgeflag) |
- (prim.v[1]->edgeflag << 1) |
- (prim.v[2]->edgeflag << 2));
- prim.pad = 0;
-
- if (0) debug_printf("tri ef: %d %d %d\n",
- prim.v[0]->edgeflag,
- prim.v[1]->edgeflag,
- prim.v[2]->edgeflag);
-
- draw->pipeline.first->tri( draw->pipeline.first, &prim );
-}
-
-
-
-void draw_pt_reset_vertex_ids( struct draw_context *draw )
-{
- unsigned i;
- char *verts = draw->pt.pipeline.verts;
- unsigned stride = draw->pt.pipeline.vertex_stride;
-
- for (i = 0; i < draw->pt.pipeline.vertex_count; i++) {
- ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID;
- verts += stride;
- }
-}
-
-
-/* Code to run the pipeline on a fairly arbitary collection of vertices.
- *
- * Vertex headers must be pre-initialized with the
- * UNDEFINED_VERTEX_ID, this code will cause that id to become
- * overwritten, so it may have to be reset if there is the intention
- * to reuse the vertices.
- *
- * This code provides a callback to reset the vertex id's which the
- * draw_vbuf.c code uses when it has to perform a flush.
- */
-void draw_pt_run_pipeline( struct draw_context *draw,
- unsigned prim,
- char *verts,
- unsigned stride,
- unsigned vertex_count,
- const ushort *elts,
- unsigned count )
-{
- unsigned i;
-
- draw->pt.pipeline.verts = verts;
- draw->pt.pipeline.vertex_stride = stride;
- draw->pt.pipeline.vertex_count = vertex_count;
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i++)
- do_point( draw,
- verts + stride * elts[i] );
- break;
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2)
- do_line( draw,
- verts + stride * elts[i+0],
- verts + stride * elts[i+1]);
- break;
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3)
- do_triangle( draw,
- verts + stride * elts[i+0],
- verts + stride * elts[i+1],
- verts + stride * elts[i+2]);
- break;
- }
-
- draw->pt.pipeline.verts = NULL;
- draw->pt.pipeline.vertex_count = 0;
-}
-
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
new file mode 100644
index 0000000000..c4a67c8289
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -0,0 +1,227 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_context.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+struct pt_post_vs {
+ struct draw_context *draw;
+
+ boolean (*run)( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+};
+
+
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
+
+
+static INLINE unsigned
+compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0x0;
+ unsigned i;
+
+#if 0
+ debug_printf("compute clipmask %f %f %f %f\n",
+ clip[0], clip[1], clip[2], clip[3]);
+ assert(clip[3] != 0.0);
+#endif
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= (1<<0);
+ if ( clip[0] + clip[3] < 0) mask |= (1<<1);
+ if (-clip[1] + clip[3] < 0) mask |= (1<<2);
+ if ( clip[1] + clip[3] < 0) mask |= (1<<3);
+ if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
+ if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/* The normal case - cliptest, rhw divide, viewport transform.
+ *
+ * Also handle identity viewport here at the expense of a few wasted
+ * instructions
+ */
+static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned clipped = 0;
+ unsigned j;
+
+ if (0) debug_printf("%s\n");
+
+ for (j = 0; j < count; j++) {
+ out->clip[0] = out->data[0][0];
+ out->clip[1] = out->data[0][1];
+ out->clip[2] = out->data[0][2];
+ out->clip[3] = out->data[0][3];
+
+ out->vertex_id = 0xffff;
+ out->clipmask = compute_clipmask_gl(out->clip,
+ pvs->draw->plane,
+ pvs->draw->nr_planes);
+ clipped += out->clipmask;
+
+ if (out->clipmask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / out->data[0][3];
+
+ /* Viewport mapping */
+ out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2];
+ out->data[0][3] = w;
+#if 0
+ debug_printf("post viewport: %f %f %f %f\n",
+ out->data[0][0],
+ out->data[0][1],
+ out->data[0][2],
+ out->data[0][3]);
+#endif
+ }
+
+ out = (struct vertex_header *)( (char *)out + stride );
+ }
+
+ return clipped != 0;
+}
+
+
+
+/* If bypass_clipping is set, skip cliptest and rhw divide.
+ */
+static boolean post_vs_viewport( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned j;
+
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ for (j = 0; j < count; j++) {
+ /* Viewport mapping only, no cliptest/rhw divide
+ */
+ out->data[0][0] = out->data[0][0] * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * scale[2] + trans[2];
+
+ out = (struct vertex_header *)((char *)out + stride);
+ }
+
+ return FALSE;
+}
+
+
+/* If bypass_clipping is set and we have an identity viewport, nothing
+ * to do.
+ */
+static boolean post_vs_none( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ return FALSE;
+}
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned count,
+ unsigned stride )
+{
+ return pvs->run( pvs, pipeline_verts, count, stride );
+}
+
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl )
+{
+ if (bypass_clipping) {
+ if (identity_viewport)
+ pvs->run = post_vs_none;
+ else
+ pvs->run = post_vs_viewport;
+ }
+ else {
+ //if (opengl)
+ pvs->run = post_vs_cliptest_viewport_gl;
+ }
+}
+
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
+{
+ struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs );
+ if (!pvs)
+ return NULL;
+
+ pvs->draw = draw;
+
+ return pvs;
+}
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs )
+{
+ FREE(pvs);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
new file mode 100644
index 0000000000..355093f945
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -0,0 +1,249 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define FETCH_MAX 256
+#define DRAW_MAX (16*FETCH_MAX)
+
+struct varray_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ ushort draw_elts[DRAW_MAX];
+ unsigned fetch_elts[FETCH_MAX];
+
+ unsigned draw_count;
+ unsigned fetch_count;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned input_prim;
+ unsigned output_prim;
+};
+
+static void varray_flush(struct varray_frontend *varray)
+{
+ if (varray->draw_count) {
+#if 0
+ debug_printf("FLUSH fc = %d, dc = %d\n",
+ varray->fetch_count,
+ varray->draw_count);
+#endif
+ varray->middle->run(varray->middle,
+ varray->fetch_elts,
+ varray->fetch_count,
+ varray->draw_elts,
+ varray->draw_count);
+ }
+
+ varray->fetch_count = 0;
+ varray->draw_count = 0;
+}
+
+#if 0
+static void varray_check_flush(struct varray_frontend *varray)
+{
+ if (varray->draw_count + 6 >= DRAW_MAX/* ||
+ varray->fetch_count + 4 >= FETCH_MAX*/) {
+ varray_flush(varray);
+ }
+}
+#endif
+
+static INLINE void add_draw_el(struct varray_frontend *varray,
+ int idx, ushort flags)
+{
+ varray->draw_elts[varray->draw_count++] = idx | flags;
+}
+
+
+static INLINE void varray_triangle( struct varray_frontend *varray,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ add_draw_el(varray, i0, 0);
+ add_draw_el(varray, i1, 0);
+ add_draw_el(varray, i2, 0);
+}
+
+static INLINE void varray_triangle_flags( struct varray_frontend *varray,
+ ushort flags,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ add_draw_el(varray, i0, flags);
+ add_draw_el(varray, i1, 0);
+ add_draw_el(varray, i2, 0);
+}
+
+static INLINE void varray_line( struct varray_frontend *varray,
+ unsigned i0,
+ unsigned i1 )
+{
+ add_draw_el(varray, i0, 0);
+ add_draw_el(varray, i1, 0);
+}
+
+
+static INLINE void varray_line_flags( struct varray_frontend *varray,
+ ushort flags,
+ unsigned i0,
+ unsigned i1 )
+{
+ add_draw_el(varray, i0, flags);
+ add_draw_el(varray, i1, 0);
+}
+
+
+static INLINE void varray_point( struct varray_frontend *varray,
+ unsigned i0 )
+{
+ add_draw_el(varray, i0, 0);
+}
+
+static INLINE void varray_quad( struct varray_frontend *varray,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2,
+ unsigned i3 )
+{
+ varray_triangle( varray, i0, i1, i3 );
+ varray_triangle( varray, i1, i2, i3 );
+}
+
+static INLINE void varray_ef_quad( struct varray_frontend *varray,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2,
+ unsigned i3 )
+{
+ const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
+
+ varray_triangle_flags( varray,
+ DRAW_PIPE_RESET_STIPPLE | omitEdge1,
+ i0, i1, i3 );
+
+ varray_triangle_flags( varray,
+ omitEdge2,
+ i1, i2, i3 );
+}
+
+/* At least for now, we're back to using a template include file for
+ * this. The two paths aren't too different though - it may be
+ * possible to reunify them.
+ */
+#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1)
+#define POINT(vc,i0) varray_point(vc,i0)
+#define FUNC varray_run_extras
+#include "draw_pt_varray_tmp.h"
+
+#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1)
+#define POINT(vc,i0) varray_point(vc,i0)
+#define FUNC varray_run
+#include "draw_pt_varray_tmp.h"
+
+
+
+static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+
+static void varray_prepare(struct draw_pt_front_end *frontend,
+ unsigned prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+
+ if (opt & PT_PIPELINE)
+ {
+ varray->base.run = varray_run_extras;
+ }
+ else
+ {
+ varray->base.run = varray_run;
+ }
+
+ varray->input_prim = prim;
+ varray->output_prim = reduced_prim[prim];
+
+ varray->middle = middle;
+ middle->prepare(middle, varray->output_prim, opt);
+}
+
+
+
+
+static void varray_finish(struct draw_pt_front_end *frontend)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ varray->middle->finish(varray->middle);
+ varray->middle = NULL;
+}
+
+static void varray_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
+{
+ struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
+ if (varray == NULL)
+ return NULL;
+
+ varray->base.prepare = varray_prepare;
+ varray->base.run = NULL;
+ varray->base.finish = varray_finish;
+ varray->base.destroy = varray_destroy;
+ varray->draw = draw;
+
+ return &varray->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
new file mode 100644
index 0000000000..b9a319b253
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -0,0 +1,144 @@
+
+static void FUNC(struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ struct draw_context *draw = varray->draw;
+ unsigned start = (unsigned)elts;
+
+ boolean flatfirst = (draw->rasterizer->flatshade &&
+ draw->rasterizer->flatshade_first);
+ unsigned i, flags;
+
+#if 0
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count);
+#endif
+#if 0
+ debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim,
+ start, count);
+#endif
+
+ for (i = 0; i < count; ++i) {
+ varray->fetch_elts[i] = start + i;
+ }
+ varray->fetch_count = count;
+
+ switch (varray->input_prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT(varray, i + 0);
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE(varray, DRAW_PIPE_RESET_STIPPLE,
+ i + 0, i + 1);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ LINE(varray, flags, i - 1, 0);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1, i + 2);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ }
+ else {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+
+ for (i = 0; i+2 < count; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ varray_flush(varray);
+}
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 107dcfc269..6b3fb1406b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -33,8 +33,6 @@
#include "pipe/p_util.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
-//#include "draw/draw_vbuf.h"
-//#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
@@ -86,8 +84,9 @@ static void vcache_check_flush( struct vcache_frontend *vcache )
}
-static void vcache_elt( struct vcache_frontend *vcache,
- unsigned felt )
+static INLINE void vcache_elt( struct vcache_frontend *vcache,
+ unsigned felt,
+ ushort flags )
{
unsigned idx = felt % CACHE_MAX;
@@ -99,28 +98,9 @@ static void vcache_elt( struct vcache_frontend *vcache,
vcache->fetch_elts[vcache->fetch_count++] = felt;
}
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx];
+ vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
}
-static unsigned add_edgeflag( struct vcache_frontend *vcache,
- unsigned idx,
- unsigned mask )
-{
- if (mask && draw_get_edgeflag(vcache->draw, idx))
- return idx | DRAW_PT_EDGEFLAG;
- else
- return idx;
-}
-
-
-static unsigned add_reset_stipple( unsigned idx,
- unsigned reset )
-{
- if (reset)
- return idx | DRAW_PT_RESET_STIPPLE;
- else
- return idx;
-}
static void vcache_triangle( struct vcache_frontend *vcache,
@@ -128,48 +108,42 @@ static void vcache_triangle( struct vcache_frontend *vcache,
unsigned i1,
unsigned i2 )
{
- vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE);
- vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG);
- vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG);
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, i2, 0);
vcache_check_flush(vcache);
}
-static void vcache_ef_triangle( struct vcache_frontend *vcache,
- boolean reset_stipple,
- unsigned ef_mask,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
+static void vcache_triangle_flags( struct vcache_frontend *vcache,
+ ushort flags,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
{
- i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 );
- i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 );
- i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 );
-
- i0 = add_reset_stipple( i0, reset_stipple );
-
- vcache_elt(vcache, i0);
- vcache_elt(vcache, i1);
- vcache_elt(vcache, i2);
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, i1, 0);
+ vcache_elt(vcache, i2, 0);
vcache_check_flush(vcache);
-
- if (0) debug_printf("emit tri ef: %d %d %d\n",
- !!(i0 & DRAW_PT_EDGEFLAG),
- !!(i1 & DRAW_PT_EDGEFLAG),
- !!(i2 & DRAW_PT_EDGEFLAG));
-
}
-
static void vcache_line( struct vcache_frontend *vcache,
- boolean reset_stipple,
unsigned i0,
unsigned i1 )
{
- i0 = add_reset_stipple( i0, reset_stipple );
+ vcache_elt(vcache, i0, 0);
+ vcache_elt(vcache, i1, 0);
+ vcache_check_flush(vcache);
+}
+
- vcache_elt(vcache, i0);
- vcache_elt(vcache, i1);
+static void vcache_line_flags( struct vcache_frontend *vcache,
+ ushort flags,
+ unsigned i0,
+ unsigned i1 )
+{
+ vcache_elt(vcache, i0, flags);
+ vcache_elt(vcache, i1, 0);
vcache_check_flush(vcache);
}
@@ -177,7 +151,7 @@ static void vcache_line( struct vcache_frontend *vcache,
static void vcache_point( struct vcache_frontend *vcache,
unsigned i0 )
{
- vcache_elt(vcache, i0);
+ vcache_elt(vcache, i0, 0);
vcache_check_flush(vcache);
}
@@ -197,237 +171,36 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
unsigned i2,
unsigned i3 )
{
- const unsigned omitEdge2 = ~(1 << 1);
- const unsigned omitEdge3 = ~(1 << 2);
- vcache_ef_triangle( vcache, 1, omitEdge2, i0, i1, i3 );
- vcache_ef_triangle( vcache, 0, omitEdge3, i1, i2, i3 );
-}
+ const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
+ vcache_triangle_flags( vcache,
+ DRAW_PIPE_RESET_STIPPLE | omitEdge1,
+ i0, i1, i3 );
+ vcache_triangle_flags( vcache,
+ omitEdge2,
+ i1, i2, i3 );
+}
+/* At least for now, we're back to using a template include file for
+ * this. The two paths aren't too different though - it may be
+ * possible to reunify them.
+ */
+#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
+#define POINT(vc,i0) vcache_point(vc,i0)
+#define FUNC vcache_run_extras
+#include "draw_pt_vcache_tmp.h"
+
+#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
+#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
+#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
+#define POINT(vc,i0) vcache_point(vc,i0)
+#define FUNC vcache_run
+#include "draw_pt_vcache_tmp.h"
-static void vcache_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
-
- boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i;
-
-// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
-
- switch (vcache->input_prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- vcache_point( vcache,
- get_elt(elts, i + 0) );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- vcache_line( vcache,
- TRUE,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1));
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- for (i = 1; i < count; i++) {
- vcache_line( vcache,
- i == 1, /* XXX: only if vb not split */
- get_elt(elts, i - 1),
- get_elt(elts, i ));
- }
-
- vcache_line( vcache,
- 0,
- get_elt(elts, count - 1),
- get_elt(elts, 0 ));
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- vcache_line( vcache,
- i == 1,
- get_elt(elts, i - 1),
- get_elt(elts, i ));
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- if (unfilled) {
- for (i = 0; i+2 < count; i += 3) {
- vcache_ef_triangle( vcache,
- 1,
- ~0,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
- }
- }
- else {
- for (i = 0; i+2 < count; i += 3) {
- vcache_triangle( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- vcache_triangle( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 2),
- get_elt(elts, i + 1 ));
- }
- else {
- vcache_triangle( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
- }
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- vcache_triangle( vcache,
- get_elt(elts, i + 1),
- get_elt(elts, i + 0),
- get_elt(elts, i + 2 ));
- }
- else {
- vcache_triangle( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- vcache_triangle( vcache,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0 ));
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- vcache_triangle( vcache,
- get_elt(elts, 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2 ));
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 4) {
- vcache_ef_quad( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, i + 3));
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- vcache_quad( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, i + 3));
- }
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 2) {
- vcache_ef_quad( vcache,
- get_elt(elts, i + 2),
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 3));
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- vcache_quad( vcache,
- get_elt(elts, i + 2),
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 3));
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- if (unfilled) {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const unsigned edge_first = (1<<2);
- const unsigned edge_middle = (1<<0);
- const unsigned edge_last = (1<<1);
-
- for (i = 0; i+2 < count; i++) {
- unsigned ef_mask = edge_middle;
-
- if (i == 0)
- ef_mask |= edge_first;
-
- if (i + 3 == count)
- ef_mask |= edge_last;
-
- vcache_ef_triangle( vcache,
- i == 0,
- ef_mask,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0));
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- vcache_triangle( vcache,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0));
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- vcache_flush( vcache );
-}
@@ -448,23 +221,25 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
static void vcache_prepare( struct draw_pt_front_end *frontend,
unsigned prim,
- struct draw_pt_middle_end *middle )
+ struct draw_pt_middle_end *middle,
+ unsigned opt )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-/*
- if (vcache->draw->rasterizer->flatshade_first)
- vcache->base.run = vcache_run_pv0;
- else
- vcache->base.run = vcache_run_pv2;
-*/
+ if (opt & PT_PIPELINE)
+ {
+ vcache->base.run = vcache_run_extras;
+ }
+ else
+ {
+ vcache->base.run = vcache_run;
+ }
- vcache->base.run = vcache_run;
vcache->input_prim = prim;
vcache->output_prim = reduced_prim[prim];
vcache->middle = middle;
- middle->prepare( middle, vcache->output_prim );
+ middle->prepare( middle, vcache->output_prim, opt );
}
@@ -486,6 +261,8 @@ static void vcache_destroy( struct draw_pt_front_end *frontend )
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
{
struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
+ if (vcache == NULL)
+ return NULL;
vcache->base.prepare = vcache_prepare;
vcache->base.run = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
new file mode 100644
index 0000000000..cf9f394aa3
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -0,0 +1,174 @@
+
+
+static void FUNC( struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ struct draw_context *draw = vcache->draw;
+
+ boolean flatfirst = (draw->rasterizer->flatshade &&
+ draw->rasterizer->flatshade_first);
+ unsigned i, flags;
+
+
+ switch (vcache->input_prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT( vcache,
+ get_elt(elts, i + 0) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE( vcache,
+ DRAW_PIPE_RESET_STIPPLE,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, i ));
+ }
+
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, 0 ));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( vcache,
+ flags,
+ get_elt(elts, i - 1),
+ get_elt(elts, i ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2 ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1 + (i&1)),
+ get_elt(elts, i + 2 - (i&1)));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 0 + (i&1)),
+ get_elt(elts, i + 1 - (i&1)),
+ get_elt(elts, i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, 0 ));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( vcache,
+ DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ get_elt(elts, 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2 ));
+ }
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( vcache,
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+
+ for (i = 0; i+2 < count; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE( vcache,
+ flags,
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, 0));
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ vcache_flush( vcache );
+}
+
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 970adc95e7..1446f785c5 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -52,9 +52,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
switch (vinfo->emit[i]) {
case EMIT_OMIT:
break;
- case EMIT_HEADER:
- vinfo->size += sizeof(struct vertex_header) / 4;
- break;
case EMIT_4UB:
/* fall-through */
case EMIT_1F_PSIZE:
@@ -71,12 +68,62 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
case EMIT_4F:
vinfo->size += 4;
break;
- case EMIT_ALL:
- /* fall-through */
default:
assert(0);
}
}
+}
+
+
+void
+draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
+{
+ unsigned i, j;
- assert(vinfo->size * 4 <= MAX_VERTEX_SIZE);
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ debug_printf("EMIT_OMIT:");
+ break;
+ case EMIT_1F:
+ debug_printf("EMIT_1F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ debug_printf("EMIT_1F_PSIZE:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_2F:
+ debug_printf("EMIT_2F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_3F:
+ debug_printf("EMIT_3F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ data += sizeof(float);
+ break;
+ case EMIT_4F:
+ debug_printf("EMIT_4F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_4UB:
+ debug_printf("EMIT_4UB:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("\n");
+ }
+ debug_printf("\n");
}
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index abd2017ed3..6d8bac5138 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -47,8 +47,6 @@
*/
enum attrib_emit {
EMIT_OMIT, /**< don't emit the attribute */
- EMIT_ALL, /**< emit whole post-xform vertex, w/ header */
- EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */
EMIT_1F,
EMIT_1F_PSIZE, /**< insert constant point size */
EMIT_2F,
@@ -108,5 +106,7 @@ draw_emit_vertex_attr(struct vertex_info *vinfo,
extern void draw_compute_vertex_size(struct vertex_info *vinfo);
+void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
+ const uint8_t *data);
#endif /* DRAW_VERTEX_H */
diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c
deleted file mode 100644
index c0248979e2..0000000000
--- a/src/gallium/auxiliary/draw/draw_vertex_cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-void draw_vertex_cache_invalidate( struct draw_context *draw )
-{
- assert(draw->pq.queue_nr == 0);
- assert(draw->vs.queue_nr == 0);
- assert(draw->vcache.referenced == 0);
-
- /* There's an error somewhere in the vcache code that requires this
- * memset. The bug is exposed in q3demo demo001, but probably
- * elsewhere as well. Will track it down later.
- */
- memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
-}
-
-
-/**
- * Check if vertex is in cache, otherwise add it. It won't go through
- * VS yet, not until there is a flush operation or the VS queue fills up.
- *
- * Note that cache entries are basically just two pointers: the first
- * an index into the user's vertex arrays, the second a location in
- * the vertex shader cache for the post-transformed vertex.
- *
- * \return pointer to location of (post-transformed) vertex header in the cache
- */
-static struct vertex_header *get_vertex( struct draw_context *draw,
- unsigned i )
-{
- unsigned slot = (i + (i>>5)) % VCACHE_SIZE;
-
- assert(slot < 32); /* so we don't exceed the bitfield size below */
-
- if (draw->vcache.referenced & (1<<slot))
- {
- /* Cache hit?
- */
- if (draw->vcache.idx[slot].in == i) {
-// _mesa_printf("HIT %d %d\n", slot, i);
- assert(draw->vcache.idx[slot].out < draw->vs.queue_nr);
- return draw->vs.queue[draw->vcache.idx[slot].out].vertex;
- }
-
- /* Otherwise a collision
- */
- slot = VCACHE_SIZE + draw->vcache.overflow++;
-// _mesa_printf("XXX %d --> %d\n", i, slot);
- }
-
- /* Deal with the cache miss:
- */
- {
- unsigned out;
-
- assert(slot < Elements(draw->vcache.idx));
-
-// _mesa_printf("NEW %d %d\n", slot, i);
- draw->vcache.idx[slot].in = i;
- draw->vcache.idx[slot].out = out = draw->vs.queue_nr++;
- draw->vcache.referenced |= (1 << slot);
-
-
- /* Add to vertex shader queue:
- */
- assert(draw->vs.queue_nr < VS_QUEUE_LENGTH);
-
- draw->vs.queue[out].elt = i;
- draw->vs.queue[out].vertex->clipmask = 0;
- draw->vs.queue[out].vertex->edgeflag = draw_get_edgeflag(draw, i);
- draw->vs.queue[out].vertex->pad = 0;
- draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID;
-
- /* Need to set the vertex's edge flag here. If we're being called
- * by do_ef_triangle(), that function needs edge flag info!
- */
-
- return draw->vs.queue[draw->vcache.idx[slot].out].vertex;
- }
-}
-
-
-static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const unsigned *elts = (const unsigned *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ushort *elts = (const ushort *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ubyte *elts = (const ubyte *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw )
-{
- unsigned i;
-
- for (i = 0; i < draw->vs.post_nr; i++)
- draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID;
-}
-
-
-void draw_vertex_cache_unreference( struct draw_context *draw )
-{
- draw->vcache.referenced = 0;
- draw->vcache.overflow = 0;
-}
-
-
-int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) {
- /* The vs queue is sized so that this can never happen:
- */
- assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH);
- return TRUE;
- }
- else
- return FALSE;
-}
-
-
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
-void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements )
-{
-// draw_statechange( draw );
-
- /* choose the get_vertex() function to use */
- switch (eltSize) {
- case 0:
- draw->vcache.get_vertex = get_vertex;
- break;
- case 1:
- draw->vcache.get_vertex = get_ubyte_elt_vertex;
- break;
- case 2:
- draw->vcache.get_vertex = get_ushort_elt_vertex;
- break;
- case 4:
- draw->vcache.get_vertex = get_uint_elt_vertex;
- break;
- default:
- assert(0);
- }
- draw->user.elts = elements;
- draw->user.eltSize = eltSize;
-}
-
diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c
deleted file mode 100644
index 9041041006..0000000000
--- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-#define DRAW_DBG 0
-
-
-/**
- * Fetch a float[4] vertex attribute from memory, doing format/type
- * conversion as needed.
- *
- * This is probably needed/dupliocated elsewhere, eg format
- * conversion, texture sampling etc.
- */
-#define FETCH_ATTRIB( NAME, SZ, CVT ) \
-static void \
-fetch_##NAME(const void *ptr, float *attrib) \
-{ \
- static const float defaults[4] = { 0,0,0,1 }; \
- int i; \
- \
- for (i = 0; i < SZ; i++) { \
- attrib[i] = CVT(i); \
- } \
- \
- for (; i < 4; i++) { \
- attrib[i] = defaults[i]; \
- } \
-}
-
-#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i]
-#define CVT_32_FLOAT(i) ((float *) ptr)[i]
-
-#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i]
-#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i]
-#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i]
-
-#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i]
-#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i]
-#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i]
-
-#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f
-#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f
-#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f
-
-#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f
-#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f
-#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f
-
-FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
-FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
-FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
-FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
-
-FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
-FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
-
-FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
-FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
-
-FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
-FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
-
-FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
-FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
-
-FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
-FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
-
-FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
-FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
-
-FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
-FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
-
-FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
-FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
-
-FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
-FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
-
-FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
-FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
-
-FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
-FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
-
-FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
-//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-
-
-
-static void
-fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
-{
- attrib[2] = CVT_8_UNORM(0);
- attrib[1] = CVT_8_UNORM(1);
- attrib[0] = CVT_8_UNORM(2);
- attrib[3] = CVT_8_UNORM(3);
-}
-
-
-fetch_func draw_get_fetch_func( enum pipe_format format )
-{
-#if 0
- {
- char tmp[80];
- pf_sprint_name(tmp, format);
- debug_printf("%s: %s\n", __FUNCTION__, tmp);
- }
-#endif
-
- switch (format) {
- case PIPE_FORMAT_R64_FLOAT:
- return fetch_R64_FLOAT;
- case PIPE_FORMAT_R64G64_FLOAT:
- return fetch_R64G64_FLOAT;
- case PIPE_FORMAT_R64G64B64_FLOAT:
- return fetch_R64G64B64_FLOAT;
- case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return fetch_R64G64B64A64_FLOAT;
-
- case PIPE_FORMAT_R32_FLOAT:
- return fetch_R32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return fetch_R32G32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return fetch_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return fetch_R32G32B32A32_FLOAT;
-
- case PIPE_FORMAT_R32_UNORM:
- return fetch_R32_UNORM;
- case PIPE_FORMAT_R32G32_UNORM:
- return fetch_R32G32_UNORM;
- case PIPE_FORMAT_R32G32B32_UNORM:
- return fetch_R32G32B32_UNORM;
- case PIPE_FORMAT_R32G32B32A32_UNORM:
- return fetch_R32G32B32A32_UNORM;
-
- case PIPE_FORMAT_R32_USCALED:
- return fetch_R32_USCALED;
- case PIPE_FORMAT_R32G32_USCALED:
- return fetch_R32G32_USCALED;
- case PIPE_FORMAT_R32G32B32_USCALED:
- return fetch_R32G32B32_USCALED;
- case PIPE_FORMAT_R32G32B32A32_USCALED:
- return fetch_R32G32B32A32_USCALED;
-
- case PIPE_FORMAT_R32_SNORM:
- return fetch_R32_SNORM;
- case PIPE_FORMAT_R32G32_SNORM:
- return fetch_R32G32_SNORM;
- case PIPE_FORMAT_R32G32B32_SNORM:
- return fetch_R32G32B32_SNORM;
- case PIPE_FORMAT_R32G32B32A32_SNORM:
- return fetch_R32G32B32A32_SNORM;
-
- case PIPE_FORMAT_R32_SSCALED:
- return fetch_R32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return fetch_R32G32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return fetch_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return fetch_R32G32B32A32_SSCALED;
-
- case PIPE_FORMAT_R16_UNORM:
- return fetch_R16_UNORM;
- case PIPE_FORMAT_R16G16_UNORM:
- return fetch_R16G16_UNORM;
- case PIPE_FORMAT_R16G16B16_UNORM:
- return fetch_R16G16B16_UNORM;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- return fetch_R16G16B16A16_UNORM;
-
- case PIPE_FORMAT_R16_USCALED:
- return fetch_R16_USCALED;
- case PIPE_FORMAT_R16G16_USCALED:
- return fetch_R16G16_USCALED;
- case PIPE_FORMAT_R16G16B16_USCALED:
- return fetch_R16G16B16_USCALED;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- return fetch_R16G16B16A16_USCALED;
-
- case PIPE_FORMAT_R16_SNORM:
- return fetch_R16_SNORM;
- case PIPE_FORMAT_R16G16_SNORM:
- return fetch_R16G16_SNORM;
- case PIPE_FORMAT_R16G16B16_SNORM:
- return fetch_R16G16B16_SNORM;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- return fetch_R16G16B16A16_SNORM;
-
- case PIPE_FORMAT_R16_SSCALED:
- return fetch_R16_SSCALED;
- case PIPE_FORMAT_R16G16_SSCALED:
- return fetch_R16G16_SSCALED;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- return fetch_R16G16B16_SSCALED;
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return fetch_R16G16B16A16_SSCALED;
-
- case PIPE_FORMAT_R8_UNORM:
- return fetch_R8_UNORM;
- case PIPE_FORMAT_R8G8_UNORM:
- return fetch_R8G8_UNORM;
- case PIPE_FORMAT_R8G8B8_UNORM:
- return fetch_R8G8B8_UNORM;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return fetch_R8G8B8A8_UNORM;
-
- case PIPE_FORMAT_R8_USCALED:
- return fetch_R8_USCALED;
- case PIPE_FORMAT_R8G8_USCALED:
- return fetch_R8G8_USCALED;
- case PIPE_FORMAT_R8G8B8_USCALED:
- return fetch_R8G8B8_USCALED;
- case PIPE_FORMAT_R8G8B8A8_USCALED:
- return fetch_R8G8B8A8_USCALED;
-
- case PIPE_FORMAT_R8_SNORM:
- return fetch_R8_SNORM;
- case PIPE_FORMAT_R8G8_SNORM:
- return fetch_R8G8_SNORM;
- case PIPE_FORMAT_R8G8B8_SNORM:
- return fetch_R8G8B8_SNORM;
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- return fetch_R8G8B8A8_SNORM;
-
- case PIPE_FORMAT_R8_SSCALED:
- return fetch_R8_SSCALED;
- case PIPE_FORMAT_R8G8_SSCALED:
- return fetch_R8G8_SSCALED;
- case PIPE_FORMAT_R8G8B8_SSCALED:
- return fetch_R8G8B8_SSCALED;
- case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return fetch_R8G8B8A8_SSCALED;
-
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return fetch_A8R8G8B8_UNORM;
-
-
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return fetch_B8G8R8A8_UNORM;
-
- case 0:
- return NULL; /* not sure why this is needed */
-
- default:
- /* This can get hit because draw-state-validation is too eager,
- and can jump in here validating stuff before the state tracker has set
- up everything.
- */
- /* assert(0); */
- return NULL;
- }
-}
-
-
-static void
-transpose_4x4( float *out, const float *in )
-{
- /* This can be achieved in 12 sse instructions, plus the final
- * stores I guess. This is probably a bit more than that - maybe
- * 32 or so?
- */
- out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12];
- out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13];
- out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14];
- out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15];
-}
-
-
-
-static void fetch_xyz_rgb( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-static void fetch_xyz_rgb_st( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[2] + elts[i] * pitch[2]);
- float *out = &machine->Inputs[2].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = 0.0f;
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void generic_vertex_fetch( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
- unsigned attr;
-
- assert(count <= 4);
-
-// debug_printf("%s %d\n", __FUNCTION__, count);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (attr = 0; attr < nr_attrs; attr++) {
-
- const unsigned pitch = draw->vertex_fetch.pitch[attr];
- const ubyte *src = draw->vertex_fetch.src_ptr[attr];
- const fetch_func fetch = draw->vertex_fetch.fetch[attr];
- unsigned i;
- float p[4][4];
-
-
- /* Fetch four attributes for four vertices.
- *
- * Could fetch directly into AOS format, but this is meant to be
- * a prototype for an sse implementation, which would have
- * difficulties doing that.
- */
- for (i = 0; i < count; i++)
- fetch( src + elts[i] * pitch, p[i] );
-
- /* Be nice and zero out any missing vertices:
- */
- for ( ; i < 4; i++)
- p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0;
-
- /* Transpose/swizzle into sse-friendly format. Currently
- * assuming that all vertex shader inputs are float[4], but this
- * isn't true -- if the vertex shader only wants tex0.xy, we
- * could optimize for that.
- *
- * To do so fully without codegen would probably require an
- * excessive number of fetch functions, but we could at least
- * minimize the transpose step:
- */
- transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p );
- }
-}
-
-
-
-void draw_update_vertex_fetch( struct draw_context *draw )
-{
- unsigned nr_attrs, i;
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* this may happend during context init */
- if (!draw->vertex_shader)
- return;
-
- nr_attrs = draw->vertex_shader->info.num_inputs;
-
- for (i = 0; i < nr_attrs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
-
- draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset;
-
- draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
- draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format );
- }
-
- draw->vertex_fetch.nr_attrs = nr_attrs;
-
- draw->vertex_fetch.fetch_func = generic_vertex_fetch;
-
- switch (nr_attrs) {
- case 2:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb;
- break;
- case 3:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st;
- break;
- default:
- break;
- }
-
-}
diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c
deleted file mode 100644
index 7bb34ace7a..0000000000
--- a/src/gallium/auxiliary/draw/draw_vf.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include <stddef.h>
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
-#include "rtasm/rtasm_execmem.h"
-
-#include "draw_vf.h"
-
-
-#define DRAW_VF_DBG 0
-
-
-static boolean match_fastpath( struct draw_vertex_fetch *vf,
- const struct draw_vf_fastpath *fp)
-{
- unsigned j;
-
- if (vf->attr_count != fp->attr_count)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].format != fp->attr[j].format ||
- vf->attr[j].inputsize != fp->attr[j].size ||
- vf->attr[j].vertoffset != fp->attr[j].offset)
- return FALSE;
-
- if (fp->match_strides) {
- if (vf->vertex_stride != fp->vertex_stride)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].inputstride != fp->attr[j].stride)
- return FALSE;
- }
-
- return TRUE;
-}
-
-static boolean search_fastpath_emit( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp = vf->fastpath;
-
- for ( ; fp ; fp = fp->next) {
- if (match_fastpath(vf, fp)) {
- vf->emit = fp->func;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-void draw_vf_register_fastpath( struct draw_vertex_fetch *vf,
- boolean match_strides )
-{
- struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath);
- unsigned i;
-
- fastpath->vertex_stride = vf->vertex_stride;
- fastpath->attr_count = vf->attr_count;
- fastpath->match_strides = match_strides;
- fastpath->func = vf->emit;
- fastpath->attr = (struct draw_vf_attr_type *)
- MALLOC(vf->attr_count * sizeof(fastpath->attr[0]));
-
- for (i = 0; i < vf->attr_count; i++) {
- fastpath->attr[i].format = vf->attr[i].format;
- fastpath->attr[i].stride = vf->attr[i].inputstride;
- fastpath->attr[i].size = vf->attr[i].inputsize;
- fastpath->attr[i].offset = vf->attr[i].vertoffset;
- }
-
- fastpath->next = vf->fastpath;
- vf->fastpath = fastpath;
-}
-
-
-
-
-/***********************************************************************
- * Build codegen functions or return generic ones:
- */
-static void choose_emit_func( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest)
-{
- vf->emit = NULL;
-
- /* Does this match an existing (hardwired, codegen or known-bad)
- * fastpath?
- */
- if (search_fastpath_emit(vf)) {
- /* Use this result. If it is null, then it is already known
- * that the current state will fail for codegen and there is no
- * point trying again.
- */
- }
- else if (vf->codegen_emit) {
- vf->codegen_emit( vf );
- }
-
- if (!vf->emit) {
- draw_vf_generate_hardwired_emit(vf);
- }
-
- /* Otherwise use the generic version:
- */
- if (!vf->emit)
- vf->emit = draw_vf_generic_emit;
-
- vf->emit( vf, count, dest );
-}
-
-
-
-
-
-/***********************************************************************
- * Public entrypoints, mostly dispatch to the above:
- */
-
-
-
-static unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride )
-{
- unsigned offset = 0;
- unsigned i, j;
-
- assert(nr < PIPE_MAX_ATTRIBS);
-
- for (j = 0, i = 0; i < nr; i++) {
- const unsigned format = map[i].format;
- if (format == DRAW_EMIT_PAD) {
-#if (DRAW_VF_DBG)
- debug_printf("%d: pad %d, offset %d\n", i,
- map[i].offset, offset);
-#endif
-
- offset += map[i].offset;
-
- }
- else {
- vf->attr[j].attrib = map[i].attrib;
- vf->attr[j].format = format;
- vf->attr[j].insert = draw_vf_format_info[format].insert;
- vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize;
- vf->attr[j].vertoffset = offset;
- vf->attr[j].isconst = draw_vf_format_info[format].isconst;
- if(vf->attr[j].isconst)
- memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize);
-
-#if (DRAW_VF_DBG)
- debug_printf("%d: %s, offset %d\n", i,
- draw_vf_format_info[format].name,
- vf->attr[j].vertoffset);
-#endif
-
- offset += draw_vf_format_info[format].attrsize;
- j++;
- }
- }
-
- vf->attr_count = j;
- vf->vertex_stride = vertex_stride ? vertex_stride : offset;
- vf->emit = choose_emit_func;
-
- assert(vf->vertex_stride >= offset);
- return vf->vertex_stride;
-}
-
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size )
-{
- unsigned i, j, k;
- struct draw_vf_attr *a = vf->attr;
- struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS];
- unsigned count = 0; /* for debug/sanity */
- unsigned nr_attrs = 0;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_ALL: {
- /* just copy the whole vertex as-is to the vbuf */
- unsigned s = vinfo->size;
- assert(i == 0);
- assert(j == 0);
- /* copy the vertex header */
- /* XXX: we actually don't copy the header, just pad it */
- attrs[nr_attrs].attrib = 0;
- attrs[nr_attrs].format = DRAW_EMIT_PAD;
- attrs[nr_attrs].offset = offsetof(struct vertex_header, data);
- s -= offsetof(struct vertex_header, data)/4;
- count += offsetof(struct vertex_header, data)/4;
- nr_attrs++;
- /* copy the vertex data */
- for(k = 0; k < (s & ~0x3); k += 4) {
- attrs[nr_attrs].attrib = k/4;
- attrs[nr_attrs].format = DRAW_EMIT_4F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 4;
- }
- /* tail */
- /* XXX: actually, this shouldn't be needed */
- attrs[nr_attrs].attrib = k/4;
- attrs[nr_attrs].offset = 0;
- switch(s & 0x3) {
- case 0:
- break;
- case 1:
- attrs[nr_attrs].format = DRAW_EMIT_1F;
- nr_attrs++;
- count += 1;
- break;
- case 2:
- attrs[nr_attrs].format = DRAW_EMIT_2F;
- nr_attrs++;
- count += 2;
- break;
- case 3:
- attrs[nr_attrs].format = DRAW_EMIT_3F;
- nr_attrs++;
- count += 3;
- break;
- }
- break;
- }
- case EMIT_HEADER:
- /* XXX emit new DRAW_EMIT_HEADER attribute??? */
- attrs[nr_attrs].attrib = 0;
- attrs[nr_attrs].format = DRAW_EMIT_PAD;
- attrs[nr_attrs].offset = offsetof(struct vertex_header, data);
- count += offsetof(struct vertex_header, data)/4;
- nr_attrs++;
- break;
- case EMIT_1F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count++;
- break;
- case EMIT_1F_PSIZE:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F_CONST;
- attrs[nr_attrs].offset = 0;
- attrs[nr_attrs].data.f[0] = point_size;
- nr_attrs++;
- count++;
- break;
- case EMIT_2F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_2F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 2;
- break;
- case EMIT_3F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_3F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 3;
- break;
- case EMIT_4F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 4;
- break;
- case EMIT_4UB:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 1;
- break;
- default:
- assert(0);
- }
- }
-
- assert(count == vinfo->size);
-
- draw_vf_set_vertex_attributes(vf,
- attrs,
- nr_attrs,
- vinfo->size * sizeof(float) );
-
- for (j = 0; j < vf->attr_count; j++) {
- a[j].inputsize = 4;
- a[j].do_insert = a[j].insert[4 - 1];
- if(a[j].isconst) {
- a[j].inputptr = a[j].data;
- a[j].inputstride = 0;
- }
- }
-}
-
-
-#if 0
-/* Set attribute pointers, adjusted for start position:
- */
-void draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const sources[],
- unsigned start )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- const GLvector4f *vptr = sources[a[j].attrib];
-
- if ((a[j].inputstride != vptr->stride) ||
- (a[j].inputsize != vptr->size))
- vf->emit = choose_emit_func;
-
- a[j].inputstride = vptr->stride;
- a[j].inputsize = vptr->size;
- a[j].do_insert = a[j].insert[vptr->size - 1];
- a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride;
- }
-}
-#endif
-
-
-/**
- * Emit a vertex to dest.
- */
-void draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- if (!a[j].isconst) {
- a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0];
- a[j].inputstride = 0; /* XXX: one-vertex-max ATM */
- }
- }
-
- vf->emit( vf, 1, (uint8_t*) dest );
-}
-
-
-
-struct draw_vertex_fetch *draw_vf_create( void )
-{
- struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch);
- unsigned i;
-
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
- vf->attr[i].vf = vf;
-
- vf->identity[0] = 0.0;
- vf->identity[1] = 0.0;
- vf->identity[2] = 0.0;
- vf->identity[3] = 1.0;
-
- vf->codegen_emit = NULL;
-
-#ifdef USE_SSE_ASM
- if (!GETENV("GALLIUM_NO_CODEGEN"))
- vf->codegen_emit = draw_vf_generate_sse_emit;
-#endif
-
- return vf;
-}
-
-
-void draw_vf_destroy( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp, *tmp;
-
- for (fp = vf->fastpath ; fp ; fp = tmp) {
- tmp = fp->next;
- FREE(fp->attr);
-
- /* KW: At the moment, fp->func is constrained to be allocated by
- * rtasm_exec_alloc(), as the hardwired fastpaths in
- * t_vertex_generic.c are handled specially. It would be nice
- * to unify them, but this probably won't change until this
- * module gets another overhaul.
- */
- //rtasm_exec_free((void *) fp->func);
- FREE(fp);
- }
-
- vf->fastpath = NULL;
- FREE(vf);
-}
diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h
deleted file mode 100644
index 7555d1bd58..0000000000
--- a/src/gallium/auxiliary/draw/draw_vf.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright 2008 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * Vertex fetch/store/convert code. This functionality is used in two places:
- * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
- * arrays and convert to format needed by vertex shaders.
- * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
- * (which is the organization used throughout the draw/prim pipeline) to
- * hardware-specific formats and emit into hardware vertex buffers.
- *
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-#ifndef DRAW_VF_H
-#define DRAW_VF_H
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-
-#include "draw_vertex.h"
-#include "draw_private.h" /* for vertex_header */
-
-
-enum draw_vf_attr_format {
- DRAW_EMIT_1F,
- DRAW_EMIT_2F,
- DRAW_EMIT_3F,
- DRAW_EMIT_4F,
- DRAW_EMIT_3F_XYW, /**< for projective texture */
- DRAW_EMIT_1UB_1F, /**< for fog coordinate */
- DRAW_EMIT_3UB_3F_RGB, /**< for specular color */
- DRAW_EMIT_3UB_3F_BGR, /**< for specular color */
- DRAW_EMIT_4UB_4F_RGBA, /**< for color */
- DRAW_EMIT_4UB_4F_BGRA, /**< for color */
- DRAW_EMIT_4UB_4F_ARGB, /**< for color */
- DRAW_EMIT_4UB_4F_ABGR, /**< for color */
- DRAW_EMIT_1F_CONST,
- DRAW_EMIT_2F_CONST,
- DRAW_EMIT_3F_CONST,
- DRAW_EMIT_4F_CONST,
- DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */
- DRAW_EMIT_MAX
-};
-
-struct draw_vf_attr_map
-{
- /** Input attribute number */
- unsigned attrib;
-
- enum draw_vf_attr_format format;
-
- unsigned offset;
-
- /**
- * Constant data for DRAW_EMIT_*_CONST
- */
- union {
- uint8_t ub[4];
- float f[4];
- } data;
-};
-
-struct draw_vertex_fetch;
-
-
-
-#if 0
-unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride );
-#endif
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size );
-
-#if 0
-void
-draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const attrib[],
- unsigned start );
-#endif
-
-void
-draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest );
-
-struct draw_vertex_fetch *
-draw_vf_create( void );
-
-void
-draw_vf_destroy( struct draw_vertex_fetch *vf );
-
-
-
-/***********************************************************************
- * Internal functions and structs:
- */
-
-struct draw_vf_attr;
-
-typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a,
- float *out,
- const uint8_t *v );
-
-typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a,
- uint8_t *v,
- const float *in );
-
-typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest );
-
-
-
-/**
- * Describes how to convert/move a vertex attribute from a vertex
- * array to a vertex structure.
- */
-struct draw_vf_attr
-{
- struct draw_vertex_fetch *vf;
-
- unsigned format;
- unsigned inputsize;
- unsigned inputstride;
- unsigned vertoffset; /**< position of the attrib in the vertex struct */
-
- boolean isconst; /**< read from const data below */
- uint8_t data[16];
-
- unsigned attrib; /**< which vertex attrib (0=position, etc) */
- unsigned vertattrsize; /**< size of the attribute in bytes */
-
- uint8_t *inputptr;
- const draw_vf_insert_func *insert;
- draw_vf_insert_func do_insert;
- draw_vf_extract_func extract;
-};
-
-struct draw_vertex_fetch
-{
- struct draw_vf_attr attr[PIPE_MAX_ATTRIBS];
- unsigned attr_count;
- unsigned vertex_stride;
-
- draw_vf_emit_func emit;
-
- /* Parameters and constants for codegen:
- */
- float identity[4];
-
- struct draw_vf_fastpath *fastpath;
-
- void (*codegen_emit)( struct draw_vertex_fetch *vf );
-};
-
-
-struct draw_vf_attr_type {
- unsigned format;
- unsigned size;
- unsigned stride;
- unsigned offset;
-};
-
-/** XXX this could be moved into draw_vf.c */
-struct draw_vf_fastpath {
- unsigned vertex_stride;
- unsigned attr_count;
- boolean match_strides;
-
- struct draw_vf_attr_type *attr;
-
- draw_vf_emit_func func;
- struct draw_vf_fastpath *next;
-};
-
-
-void
-draw_vf_register_fastpath( struct draw_vertex_fetch *vtx,
- boolean match_strides );
-
-void
-draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v );
-
-void
-draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf );
-
-void
-draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf );
-
-
-/** XXX this type and function could probably be moved into draw_vf.c */
-struct draw_vf_format_info {
- const char *name;
- draw_vf_insert_func insert[4];
- const unsigned attrsize;
- const boolean isconst;
-};
-
-extern const struct draw_vf_format_info
-draw_vf_format_info[DRAW_EMIT_MAX];
-
-
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c
deleted file mode 100644
index 7a60a9db9c..0000000000
--- a/src/gallium/auxiliary/draw/draw_vf_generic.c
+++ /dev/null
@@ -1,585 +0,0 @@
-
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_vf.h"
-
-
-
-static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
-}
-
-static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = 1;
-}
-
-static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[3];
-}
-
-static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
- assert(0);
-}
-
-static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
-}
-
-static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
-}
-
-static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
-}
-
-
-static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
-}
-
-static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
-}
-
-static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
-}
-
-static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
-}
-
-static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- v[2] = 0x00;
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- v[2] = 0x00;
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
-}
-
-static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
-}
-
-static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
-}
-
-
-static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
-}
-
-
-const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] =
-{
- { "1f",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), FALSE },
-
- { "2f",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), FALSE },
-
- { "3f",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), FALSE },
-
- { "4f",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), FALSE },
-
- { "3f_xyw",
- { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err,
- insert_3f_xyw_4 },
- 3 * sizeof(float), FALSE },
-
- { "1ub_1f",
- { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 },
- sizeof(uint8_t), FALSE },
-
- { "3ub_3f_rgb",
- { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3,
- insert_3ub_3f_rgb_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "3ub_3f_bgr",
- { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3,
- insert_3ub_3f_bgr_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_rgba",
- { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3,
- insert_4ub_4f_rgba_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_bgra",
- { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3,
- insert_4ub_4f_bgra_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_argb",
- { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3,
- insert_4ub_4f_argb_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_abgr",
- { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3,
- insert_4ub_4f_abgr_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "1f_const",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), TRUE },
-
- { "2f_const",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), TRUE },
-
- { "3f_const",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), TRUE },
-
- { "4f_const",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), TRUE },
-
- { "pad",
- { NULL, NULL, NULL, NULL },
- 0, FALSE },
-
-};
-
-
-
-
-/***********************************************************************
- * Hardwired fastpaths for emitting whole vertices or groups of
- * vertices
- */
-#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \
-static void NAME( struct draw_vertex_fetch *vf, \
- unsigned count, \
- uint8_t *v ) \
-{ \
- struct draw_vf_attr *a = vf->attr; \
- unsigned i; \
- \
- for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \
- if (NR > 0) { \
- F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \
- a[0].inputptr += a[0].inputstride; \
- } \
- \
- if (NR > 1) { \
- F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \
- a[1].inputptr += a[1].inputstride; \
- } \
- \
- if (NR > 2) { \
- F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \
- a[2].inputptr += a[2].inputstride; \
- } \
- \
- if (NR > 3) { \
- F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \
- a[3].inputptr += a[3].inputstride; \
- } \
- \
- if (NR > 4) { \
- F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \
- a[4].inputptr += a[4].inputstride; \
- } \
- } \
-}
-
-
-#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \
- insert_null, insert_null, NAME)
-
-#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \
- insert_null, NAME)
-
-#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \
- insert_null, NAME)
-
-
-EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4)
-
-EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2)
-
-EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2)
-
-
-/* Use the codegen paths to select one of a number of hardwired
- * fastpaths.
- */
-void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf )
-{
- draw_vf_emit_func func = NULL;
-
- /* Does it fit a hardwired fastpath? Help! this is growing out of
- * control!
- */
- switch (vf->attr_count) {
- case 2:
- if (vf->attr[0].do_insert == insert_3f_3 &&
- vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- func = emit_xyz3_rgba4;
- }
- break;
- case 3:
- if (vf->attr[2].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2;
- }
- }
- break;
- case 4:
- if (vf->attr[2].do_insert == insert_2f_2 &&
- vf->attr[3].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2_st2;
- }
- }
- break;
- }
-
- vf->emit = func;
-}
-
-/***********************************************************************
- * Generic (non-codegen) functions for whole vertices or groups of
- * vertices
- */
-
-void draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v )
-{
- struct draw_vf_attr *a = vf->attr;
- const unsigned attr_count = vf->attr_count;
- const unsigned stride = vf->vertex_stride;
- unsigned i, j;
-
- for (i = 0 ; i < count ; i++, v += stride) {
- for (j = 0; j < attr_count; j++) {
- float *in = (float *)a[j].inputptr;
- a[j].inputptr += a[j].inputstride;
- a[j].do_insert( &a[j], v + a[j].vertoffset, in );
- }
- }
-}
-
-
diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c
deleted file mode 100644
index aff4ffd985..0000000000
--- a/src/gallium/auxiliary/draw/draw_vf_sse.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include "pipe/p_compiler.h"
-#include "util/u_simple_list.h"
-
-#include "draw_vf.h"
-
-
-#if defined(USE_SSE_ASM)
-
-#include "rtasm/rtasm_cpu.h"
-#include "rtasm/rtasm_x86sse.h"
-
-
-#define X 0
-#define Y 1
-#define Z 2
-#define W 3
-
-
-struct x86_program {
- struct x86_function func;
-
- struct draw_vertex_fetch *vf;
- boolean inputs_safe;
- boolean outputs_safe;
- boolean have_sse2;
-
- struct x86_reg identity;
- struct x86_reg chan0;
-};
-
-
-static struct x86_reg get_identity( struct x86_program *p )
-{
- return p->identity;
-}
-
-static void emit_load4f_4( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_load4f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Initialize from identity, then pull in low two words:
- */
- sse_movups(&p->func, dest, get_identity(p));
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Pull in low word, then swizzle in identity */
- sse_movss(&p->func, dest, arg0);
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
-}
-
-
-
-static void emit_load3f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Over-reads by 1 dword - potential SEGV if input is a vertex
- * array.
- */
- if (p->inputs_safe) {
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* c 0 0 0
- * c c c c
- * a b c c
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
- sse_movlps(&p->func, dest, arg0);
- }
-}
-
-static void emit_load3f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_2(p, dest, arg0);
-}
-
-static void emit_load3f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load2f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load2f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load1f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-static void (*load[4][4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) = {
- { emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1 },
-
- { emit_load2f_1,
- emit_load2f_2,
- emit_load2f_2,
- emit_load2f_2 },
-
- { emit_load3f_1,
- emit_load3f_2,
- emit_load3f_3,
- emit_load3f_3 },
-
- { emit_load4f_1,
- emit_load4f_2,
- emit_load4f_3,
- emit_load4f_4 }
-};
-
-static void emit_load( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg src,
- unsigned src_sz)
-{
- load[sz-1][src_sz-1](p, dest, src);
-}
-
-static void emit_store4f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_store3f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- if (p->outputs_safe) {
- /* Emit the extra dword anyway. This may hurt writecombining,
- * may cause other problems.
- */
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* Alternate strategy - emit two, shuffle, emit one.
- */
- sse_movlps(&p->func, dest, arg0);
- sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(&p->func, x86_make_disp(dest,8), arg0);
- }
-}
-
-static void emit_store2f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_store1f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-
-static void (*store[4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) =
-{
- emit_store1f,
- emit_store2f,
- emit_store3f,
- emit_store4f
-};
-
-static void emit_store( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg temp )
-
-{
- store[sz-1](p, dest, temp);
-}
-
-static void emit_pack_store_4ub( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg temp )
-{
- /* Scale by 255.0
- */
- sse_mulps(&p->func, temp, p->chan0);
-
- if (p->have_sse2) {
- sse2_cvtps2dq(&p->func, temp, temp);
- sse2_packssdw(&p->func, temp, temp);
- sse2_packuswb(&p->func, temp, temp);
- sse_movss(&p->func, dest, temp);
- }
- else {
- struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);
- struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);
- sse_cvtps2pi(&p->func, mmx0, temp);
- sse_movhlps(&p->func, temp, temp);
- sse_cvtps2pi(&p->func, mmx1, temp);
- mmx_packssdw(&p->func, mmx0, mmx1);
- mmx_packuswb(&p->func, mmx0, mmx0);
- mmx_movd(&p->func, dest, mmx0);
- }
-}
-
-static int get_offset( const void *a, const void *b )
-{
- return (const char *)b - (const char *)a;
-}
-
-/* Not much happens here. Eventually use this function to try and
- * avoid saving/reloading the source pointers each vertex (if some of
- * them can fit in registers).
- */
-static void get_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* Load current a[j].inputptr
- */
- x86_mov(&p->func, srcREG, ptr_to_src);
-}
-
-static void update_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- if (a->inputstride) {
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* add a[j].inputstride (hardcoded value - could just as easily
- * pull the stride value from memory each time).
- */
- x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
-
- /* save new value of a[j].inputptr
- */
- x86_mov(&p->func, ptr_to_src, srcREG);
- }
-}
-
-
-/* Lots of hardcoding
- *
- * EAX -- pointer to current output vertex
- * ECX -- pointer to current attribute
- *
- */
-static boolean build_vertex_emit( struct x86_program *p )
-{
- struct draw_vertex_fetch *vf = p->vf;
- unsigned j = 0;
-
- struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX);
- struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX);
- struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
- struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI);
- struct x86_reg temp = x86_make_reg(file_XMM, 0);
- uint8_t *fixup, *label;
-
- /* Push a few regs?
- */
- x86_push(&p->func, countEBP);
- x86_push(&p->func, vfESI);
-
-
- /* Get vertex count, compare to zero
- */
- x86_xor(&p->func, srcECX, srcECX);
- x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2));
- x86_cmp(&p->func, countEBP, srcECX);
- fixup = x86_jcc_forward(&p->func, cc_E);
-
- /* Initialize destination register.
- */
- x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
-
- /* Move argument 1 (vf) into a reg:
- */
- x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1));
-
-
- /* always load, needed or not:
- */
- sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0])));
-
- /* Note address for loop jump */
- label = x86_get_label(&p->func);
-
- /* Emit code for each of the attributes. Currently routes
- * everything through SSE registers, even when it might be more
- * efficient to stick with regular old x86. No optimization or
- * other tricks - enough new ground to cover here just getting
- * things working.
- */
- while (j < vf->attr_count) {
- struct draw_vf_attr *a = &vf->attr[j];
- struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset);
-
- /* Now, load an XMM reg from src, perhaps transform, then save.
- * Could be shortcircuited in specific cases:
- */
- switch (a->format) {
- case DRAW_EMIT_1F:
- case DRAW_EMIT_1F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 1, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_2F:
- case DRAW_EMIT_2F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F:
- case DRAW_EMIT_3F_CONST:
- /* Potentially the worst case - hardcode 2+1 copying:
- */
- if (0) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- if (a->inputsize > 2) {
- emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1);
- emit_store(p, x86_make_disp(dest,8), 1, temp);
- }
- else {
- sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p));
- }
- update_src_ptr(p, srcECX, vfESI, a);
- }
- break;
- case DRAW_EMIT_4F:
- case DRAW_EMIT_4F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 4, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F_XYW:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z));
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
-
- case DRAW_EMIT_1UB_1F:
- /* Test for PAD3 + 1UB:
- */
- if (j > 0 &&
- a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3)
- {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X));
- emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- debug_printf("Can't emit 1ub %x %x %d\n",
- a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize );
- return FALSE;
- }
- break;
- case DRAW_EMIT_3UB_3F_RGB:
- case DRAW_EMIT_3UB_3F_BGR:
- /* Test for 3UB + PAD1:
- */
- if (j == vf->attr_count - 1 ||
- a[1].vertoffset >= a->vertoffset + 4) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- /* Test for 3UB + 1UB:
- */
- else if (j < vf->attr_count - 1 &&
- a[1].format == DRAW_EMIT_1UB_1F &&
- a[1].vertoffset == a->vertoffset + 3) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- update_src_ptr(p, srcECX, vfESI, a);
-
- /* Make room for incoming value:
- */
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
-
- get_src_ptr(p, srcECX, vfESI, &a[1]);
- emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize);
- update_src_ptr(p, srcECX, vfESI, &a[1]);
-
- /* Rearrange and possibly do BGR conversion:
- */
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- else
- sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X));
-
- emit_pack_store_4ub(p, dest, temp);
- j++; /* NOTE: two attrs consumed */
- }
- else {
- debug_printf("Can't emit 3ub\n");
- }
- return FALSE; /* add this later */
- break;
-
- case DRAW_EMIT_4UB_4F_RGBA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_BGRA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ARGB:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ABGR:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- default:
- debug_printf("unknown a[%d].format %d\n", j, a->format);
- return FALSE; /* catch any new opcodes */
- }
-
- /* Increment j by at least 1 - may have been incremented above also:
- */
- j++;
- }
-
- /* Next vertex:
- */
- x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride));
-
- /* decr count, loop if not zero
- */
- x86_dec(&p->func, countEBP);
- x86_test(&p->func, countEBP, countEBP);
- x86_jcc(&p->func, cc_NZ, label);
-
- /* Exit mmx state?
- */
- if (p->func.need_emms)
- mmx_emms(&p->func);
-
- /* Land forward jump here:
- */
- x86_fixup_fwd_jump(&p->func, fixup);
-
- /* Pop regs and return
- */
- x86_pop(&p->func, x86_get_base_reg(vfESI));
- x86_pop(&p->func, countEBP);
- x86_ret(&p->func);
-
- vf->emit = (draw_vf_emit_func)x86_get_func(&p->func);
- return TRUE;
-}
-
-
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- struct x86_program p;
-
- if (!rtasm_cpu_has_sse()) {
- vf->codegen_emit = NULL;
- return;
- }
-
- memset(&p, 0, sizeof(p));
-
- p.vf = vf;
- p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
- p.have_sse2 = rtasm_cpu_has_sse2();
- p.identity = x86_make_reg(file_XMM, 6);
- p.chan0 = x86_make_reg(file_XMM, 7);
-
- x86_init_func(&p.func);
-
- if (build_vertex_emit(&p)) {
- draw_vf_register_fastpath( vf, TRUE );
- }
- else {
- /* Note the failure so that we don't keep trying to codegen an
- * impossible state:
- */
- draw_vf_register_fastpath( vf, FALSE );
- x86_release_func(&p.func);
- }
-}
-
-#else
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- /* Dummy version for when USE_SSE_ASM not defined */
-}
-
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vs.c
index 133418baca..03fe00a951 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_shader.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -39,51 +39,6 @@
-/**
- * Run the vertex shader on all vertices in the vertex queue.
- * Called by the draw module when the vertx cache needs to be flushed.
- */
-void
-draw_vertex_shader_queue_flush(struct draw_context *draw)
-{
- struct draw_vertex_shader *shader = draw->vertex_shader;
- unsigned i;
-
- assert(draw->vs.queue_nr != 0);
-
- /* XXX: do this on statechange:
- */
- shader->prepare( shader, draw );
-
-// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr );
-
- /* run vertex shader on vertex cache entries, four per invokation */
- for (i = 0; i < draw->vs.queue_nr; i += 4) {
- struct vertex_header *dests[4];
- unsigned elts[4];
- int j, n = MIN2(4, draw->vs.queue_nr - i);
-
- for (j = 0; j < n; j++) {
- elts[j] = draw->vs.queue[i + j].elt;
- dests[j] = draw->vs.queue[i + j].vertex;
- }
-
- for ( ; j < 4; j++) {
- elts[j] = elts[0];
- dests[j] = draw->vs.queue[i + j].vertex;
- }
-
- assert(n > 0);
- assert(n <= 4);
-
- shader->run(shader, draw, elts, n, dests);
- }
-
- draw->vs.post_nr = draw->vs.queue_nr;
- draw->vs.queue_nr = 0;
-}
-
-
struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
@@ -97,10 +52,8 @@ draw_create_vertex_shader(struct draw_context *draw,
vs = draw_create_vs_exec( draw, shader );
}
}
- assert(vs);
-
- tgsi_scan_shader(shader->tokens, &vs->info);
+ assert(vs);
return vs;
}
@@ -115,9 +68,6 @@ draw_bind_vertex_shader(struct draw_context *draw,
{
draw->vertex_shader = dvs;
draw->num_vs_outputs = dvs->info.num_outputs;
-
- tgsi_exec_machine_init(&draw->machine);
-
dvs->prepare( dvs, draw );
}
else {
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 4ee7e705e9..f9772b83b8 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -31,10 +31,43 @@
#ifndef DRAW_VS_H
#define DRAW_VS_H
-struct draw_vertex_shader;
+#include "draw_context.h"
+#include "draw_private.h"
+
+
struct draw_context;
struct pipe_shader_state;
+/**
+ * Private version of the compiled vertex_shader
+ */
+struct draw_vertex_shader {
+
+ /* This member will disappear shortly:
+ */
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+
+ void (*prepare)( struct draw_vertex_shader *shader,
+ struct draw_context *draw );
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ void (*run_linear)( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride );
+
+
+ void (*delete)( struct draw_vertex_shader * );
+};
+
+
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
@@ -47,4 +80,8 @@ struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
+
+#define MAX_TGSI_VERTICES 4
+
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index c6e503686a..7a02f6334b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -39,163 +39,120 @@
#include "draw_vs.h"
#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_scan.h"
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
+struct exec_vertex_shader {
+ struct draw_vertex_shader base;
+ struct tgsi_exec_machine *machine;
+};
- return mask;
+static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs )
+{
+ return (struct exec_vertex_shader *)vs;
}
+/* Not required for run_linear.
+ */
static void
vs_exec_prepare( struct draw_vertex_shader *shader,
struct draw_context *draw )
{
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+
/* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(&draw->machine,
+ tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
*/
static void
-vs_exec_run( struct draw_vertex_shader *shader,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- struct vertex_header *vOut[] )
+vs_exec_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(count <= 4);
- assert(draw->vertex_shader->info.output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+ struct tgsi_exec_machine *machine = evs->machine;
+ unsigned int i, j;
+ unsigned slot;
+
+ machine->Consts = constants;
+
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+#if 0
+ debug_printf("%d) Input vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ input[slot][0],
+ input[slot][1],
+ input[slot][2],
+ input[slot][3]);
+ }
+#endif
+
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
+
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
- if (!draw->rasterizer->bypass_vs) {
/* run interpreter */
tgsi_exec_machine_run( machine );
- }
-
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
+ /* Unswizzle all output results.
*/
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- vOut[j]->clipmask = 0;
- }
- vOut[j]->edgeflag = 1;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
- }
- else {
- vOut[j]->data[0][0] = x;
- vOut[j]->data[0][1] = y;
- vOut[j]->data[0][2] = z;
- vOut[j]->data[0][3] = w;
- }
-
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-
-#if 0 /*DEBUG*/
- printf("Post xform vert:\n");
- for (slot = 0; slot < draw->num_vs_outputs; slot++) {
- printf("%d: %f %f %f %f\n", slot,
- vOut[j]->data[slot][0],
- vOut[j]->data[slot][1],
- vOut[j]->data[slot][2],
- vOut[j]->data[slot][3]);
- }
-#endif
-
-
- } /* loop over vertices */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+
+ }
+
+#if 0
+ debug_printf("%d) Post xform vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ output[slot][0],
+ output[slot][1],
+ output[slot][2],
+ output[slot][3]);
+ }
+#endif
+
+ output = (float (*)[4])((char *)output + output_stride);
+ }
+
+ }
}
+
static void
vs_exec_delete( struct draw_vertex_shader *dvs )
{
@@ -208,17 +165,24 @@ struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *state)
{
- struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader );
- uint nt = tgsi_num_tokens(state->tokens);
+ struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
if (vs == NULL)
return NULL;
/* we make a private copy of the tokens */
- vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
- vs->prepare = vs_exec_prepare;
- vs->run = vs_exec_run;
- vs->delete = vs_exec_delete;
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+ vs->base.prepare = vs_exec_prepare;
+ vs->base.run_linear = vs_exec_run_linear;
+ vs->base.delete = vs_exec_delete;
+ vs->machine = &draw->machine;
- return vs;
+ return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index c8268317ef..171da51dd5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
/*
@@ -47,154 +47,42 @@
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
struct gallivm_prog *llvm_prog;
+ struct tgsi_exec_machine *machine;
};
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-
static void
vs_llvm_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
+
static void
-vs_llvm_run( struct draw_vertex_shader *base,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- struct vertex_header *vOut[] )
+vs_llvm_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
- struct draw_llvm_vertex_shader *shader =
+ struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
-
- assert(count <= 4);
- assert(draw->vertex_shader->state->output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
-
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
-
-
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+ gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine,
+ input, base->info.num_inputs, output, base->info.num_outputs,
+ constants, count, input_stride, output_stride);
+}
- if (!draw->rasterizer->bypass_vs) {
- /* run shader */
- gallivm_cpu_vs_exec(shader->llvm_prog,
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps);
- }
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
-
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- vOut[j]->clipmask = 0;
- }
- vOut[j]->edgeflag = 1;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
- }
- else {
- vOut[j]->data[0][0] = x;
- vOut[j]->data[0][1] = y;
- vOut[j]->data[0][2] = z;
- vOut[j]->data[0][3] = w;
- }
-
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
- } /* loop over vertices */
-}
static void
vs_llvm_delete( struct draw_vertex_shader *base )
{
- struct draw_llvm_vertex_shader *shader =
+ struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
/* Do something to free compiled shader:
@@ -212,23 +100,30 @@ draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
struct draw_llvm_vertex_shader *vs;
- uint nt = tgsi_num_tokens(templ->tokens);
vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
- if (vs == NULL)
+ if (vs == NULL)
return NULL;
/* we make a private copy of the tokens */
- vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
+
vs->base.prepare = vs_llvm_prepare;
- vs->base.run = vs_llvm_run;
+ vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
+ vs->machine = &draw->machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
gallivm_ir_set_layout(ir, GALLIVM_SOA);
gallivm_ir_set_components(ir, 4);
- gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens);
+ gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens);
vs->llvm_prog = gallivm_ir_compile(ir);
gallivm_ir_delete(ir);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index f40d65df08..e3f4e67472 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -41,173 +41,136 @@
#include "draw_private.h"
#include "draw_context.h"
+#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
#include "tgsi/exec/tgsi_sse2.h"
#include "tgsi/util/tgsi_parse.h"
+#define SSE_MAX_VERTICES 4
+#define SSE_SWIZZLES 1
+#if SSE_SWIZZLES
+typedef void (XSTDCALL *codegen_function) (
+ const struct tgsi_exec_vector *input, /* 1 */
+ struct tgsi_exec_vector *output, /* 2 */
+ float (*constant)[4], /* 3 */
+ struct tgsi_exec_vector *temporary, /* 4 */
+ float (*immediates)[4], /* 5 */
+ const float (*aos_input)[4], /* 6 */
+ uint num_inputs, /* 7 */
+ uint input_stride, /* 8 */
+ float (*aos_output)[4], /* 9 */
+ uint num_outputs, /* 10 */
+ uint output_stride ); /* 11 */
+#else
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
- struct tgsi_exec_vector *temporary );
-
+ struct tgsi_exec_vector *temporary,
+ float (*immediates)[4] );
+#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
- codegen_function func;
-};
+ codegen_function func;
+
+ struct tgsi_exec_machine *machine;
-/* Should be part of the generated shader:
- */
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
+ float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
+};
static void
vs_sse_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
*/
static void
-vs_sse_run( struct draw_vertex_shader *base,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- struct vertex_header *vOut[] )
+vs_sse_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(count <= 4);
- assert(draw->vertex_shader->info.output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
-
-
- /* Fetch vertices. This may at some point be integrated into the
- * compiled shader -- that would require a reorganization where
- * multiple versions of the compiled shader might exist,
- * specialized for each fetch state.
- */
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned int i;
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
- if (!draw->rasterizer->bypass_vs) {
+#if SSE_SWIZZLES
/* run compiled shader
- */
+ */
shader->func(machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps );
- }
-
-
- /* XXX: Computing the clipmask and emitting results should be done
- * in the vertex program as a set of instructions appended to
- * the user-provided code.
- */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
-
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ machine->Outputs,
+ (float (*)[4])constants,
+ machine->Temps,
+ shader->immediates,
+ input,
+ base->info.num_inputs,
+ input_stride,
+ output,
+ base->info.num_outputs,
+ output_stride );
+
+ input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+ output = (float (*)[4])((char *)output + output_stride * max_vertices);
+#else
+ unsigned int j, slot;
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- vOut[j]->clipmask = 0;
- }
- vOut[j]->edgeflag = 1;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
- }
- else {
- vOut[j]->data[0][0] = x;
- vOut[j]->data[0][1] = y;
- vOut[j]->data[0][2] = z;
- vOut[j]->data[0][3] = w;
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < base->info.num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
+
+ input = (const float (*)[4])((const char *)input + input_stride);
}
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
+ /* run compiled shader
*/
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ shader->func(machine->Inputs,
+ machine->Outputs,
+ (float (*)[4])constants,
+ machine->Temps,
+ shader->immediates);
+
+ /* Unswizzle all output results.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < base->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+
+ output = (float (*)[4])((char *)output + output_stride);
}
- }
+#endif
+ }
}
+
static void
vs_sse_delete( struct draw_vertex_shader *base )
{
@@ -225,9 +188,8 @@ draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
struct draw_sse_vertex_shader *vs;
- uint nt = tgsi_num_tokens(templ->tokens);
- if (!draw->use_sse)
+ if (!rtasm_cpu_has_sse2())
return NULL;
vs = CALLOC_STRUCT( draw_sse_vertex_shader );
@@ -235,18 +197,27 @@ draw_create_vs_sse(struct draw_context *draw,
return NULL;
/* we make a private copy of the tokens */
- vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+ vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+ if (!vs->base.state.tokens)
+ goto fail;
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
vs->base.prepare = vs_sse_prepare;
- vs->base.run = vs_sse_run;
+ vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+ vs->machine = &draw->machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program ))
+ &vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
+ if (!vs->func) {
+ goto fail;
+ }
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c
deleted file mode 100644
index d6bff110b4..0000000000
--- a/src/gallium/auxiliary/draw/draw_wide_prims.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-struct wide_stage {
- struct draw_stage stage;
-
- float half_line_width;
- float half_point_size;
-
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
-
- int psize_slot;
-};
-
-
-
-static INLINE struct wide_stage *wide_stage( struct draw_stage *stage )
-{
- return (struct wide_stage *)stage;
-}
-
-
-static void passthrough_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-static void passthrough_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line(stage->next, header);
-}
-
-static void passthrough_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
-
-
-/**
- * Draw a wide line by drawing a quad (two triangles).
- * XXX need to disable polygon stipple.
- */
-static void wide_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const float half_width = wide->half_line_width;
-
- struct prim_header tri;
-
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- const float dx = FABSF(pos0[0] - pos2[0]);
- const float dy = FABSF(pos0[1] - pos2[1]);
-
- /*
- * Draw wide line as a quad (two tris) by "stretching" the line along
- * X or Y.
- * We need to tweak coords in several ways to be conformant here.
- */
-
- if (dx > dy) {
- /* x-major line */
- pos0[1] = pos0[1] - half_width - 0.25f;
- pos1[1] = pos1[1] + half_width - 0.25f;
- pos2[1] = pos2[1] - half_width - 0.25f;
- pos3[1] = pos3[1] + half_width - 0.25f;
- if (pos0[0] < pos2[0]) {
- /* left to right line */
- pos0[0] -= 0.5f;
- pos1[0] -= 0.5f;
- pos2[0] -= 0.5f;
- pos3[0] -= 0.5f;
- }
- else {
- /* right to left line */
- pos0[0] += 0.5f;
- pos1[0] += 0.5f;
- pos2[0] += 0.5f;
- pos3[0] += 0.5f;
- }
- }
- else {
- /* y-major line */
- pos0[0] = pos0[0] - half_width + 0.25f;
- pos1[0] = pos1[0] + half_width + 0.25f;
- pos2[0] = pos2[0] - half_width + 0.25f;
- pos3[0] = pos3[0] + half_width + 0.25f;
- if (pos0[1] < pos2[1]) {
- /* top to bottom line */
- pos0[1] -= 0.5f;
- pos1[1] -= 0.5f;
- pos2[1] -= 0.5f;
- pos3[1] -= 0.5f;
- }
- else {
- /* bottom to top line */
- pos0[1] += 0.5f;
- pos1[1] += 0.5f;
- pos2[1] += 0.5f;
- pos3[1] += 0.5f;
- }
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-/**
- * Set the vertex texcoords for sprite mode.
- * Coords may be left untouched or set to a right-side-up or upside-down
- * orientation.
- */
-static void set_texcoords(const struct wide_stage *wide,
- struct vertex_header *v, const float tc[4])
-{
- uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
-}
-
-
-/* If there are lots of sprite points (and why wouldn't there be?) it
- * would probably be more sensible to change hardware setup to
- * optimize this rather than doing the whole thing in software like
- * this.
- */
-static void wide_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
- float half_size;
- float left_adj, right_adj;
-
- struct prim_header tri;
-
- /* four dups of original vertex */
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- /* point size is either per-vertex or fixed size */
- if (wide->psize_slot >= 0) {
- half_size = 0.5f * header->v[0]->data[wide->psize_slot][0];
- }
- else {
- half_size = wide->half_point_size;
- }
-
- left_adj = -half_size; /* + 0.25f;*/
- right_adj = half_size; /* + 0.25f;*/
-
- pos0[0] += left_adj;
- pos0[1] -= half_size;
-
- pos1[0] += left_adj;
- pos1[1] += half_size;
-
- pos2[0] += right_adj;
- pos2[1] -= half_size;
-
- pos3[0] += right_adj;
- pos3[1] += half_size;
-
- if (sprite) {
- static const float tex00[4] = { 0, 0, 0, 1 };
- static const float tex01[4] = { 0, 1, 0, 1 };
- static const float tex11[4] = { 1, 1, 0, 1 };
- static const float tex10[4] = { 1, 0, 0, 1 };
- set_texcoords( wide, v0, tex00 );
- set_texcoords( wide, v1, tex01 );
- set_texcoords( wide, v2, tex10 );
- set_texcoords( wide, v3, tex11 );
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-static void wide_first_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_point_size = 0.5f * draw->rasterizer->point_size;
-
- /* XXX we won't know the real size if it's computed by the vertex shader! */
- if (draw->rasterizer->point_size > draw->wide_point_threshold) {
- stage->point = wide_point;
- }
- else {
- stage->point = passthrough_point;
- }
-
- if (draw->rasterizer->point_sprite) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i, j = 0;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j];
- j++;
- }
- }
- wide->num_texcoords = j;
- }
-
- wide->psize_slot = -1;
-
- if (draw->rasterizer->point_size_per_vertex) {
- /* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
- wide->psize_slot = i;
- break;
- }
- }
- }
-
- stage->point( stage, header );
-}
-
-
-
-static void wide_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_line_width = 0.5f * draw->rasterizer->line_width;
-
- if (draw->rasterizer->line_width != 1.0) {
- wide->stage.line = wide_line;
- }
- else {
- wide->stage.line = passthrough_line;
- }
-
- stage->line( stage, header );
-}
-
-
-static void wide_flush( struct draw_stage *stage, unsigned flags )
-{
- stage->line = wide_first_line;
- stage->point = wide_first_point;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void wide_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void wide_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-struct draw_stage *draw_wide_stage( struct draw_context *draw )
-{
- struct wide_stage *wide = CALLOC_STRUCT(wide_stage);
-
- draw_alloc_temp_verts( &wide->stage, 4 );
-
- wide->stage.draw = draw;
- wide->stage.next = NULL;
- wide->stage.point = wide_first_point;
- wide->stage.line = wide_first_line;
- wide->stage.tri = passthrough_tri;
- wide->stage.flush = wide_flush;
- wide->stage.reset_stipple_counter = wide_reset_stipple_counter;
- wide->stage.destroy = wide_destroy;
-
- return &wide->stage;
-}
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
index c24e19e062..c3f7bfba93 100644
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -65,10 +65,14 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
gallivm_builtins.cpp: llvm_builtins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ rm temp1.bin
gallivmsoabuiltins.cpp: soabuiltins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ rm temp2.bin
# Emacs tags
tags:
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
index b6f641a3f8..77900e342b 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -288,10 +288,7 @@ void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
std::cout << "Creating llvm from: " <<std::endl;
tgsi_dump(tokens, 0);
-
llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
-
- //llvm::Module *mod = tgsi_to_llvm(ir, tokens);
ir->module = mod;
gallivm_ir_dump(ir, 0);
}
@@ -306,11 +303,11 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
{
struct gallivm_prog *prog =
(struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
-
+
std::cout << "Before optimizations:"<<std::endl;
ir->module->dump();
std::cout<<"-------------------------------"<<std::endl;
-
+
PassManager veri;
veri.add(createVerifierPass());
veri.run(*ir->module);
@@ -318,7 +315,7 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
prog->num_consts = ir->num_consts;
memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
prog->num_interp = ir->num_interp;
-
+
/* Run optimization passes over it */
PassManager passes;
passes.add(new TargetData(mod));
diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h
index b4d6555d2f..36a64a7747 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.h
+++ b/src/gallium/auxiliary/gallivm/gallivm.h
@@ -90,10 +90,15 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);
struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog);
struct gallivm_cpu_engine *gallivm_global_cpu_engine();
int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
- struct tgsi_exec_vector *inputs,
- struct tgsi_exec_vector *dests,
- float (*consts)[4],
- struct tgsi_exec_vector *temps);
+ struct tgsi_exec_machine *machine,
+ const float (*input)[4],
+ unsigned num_inputs,
+ float (*output)[4],
+ unsigned num_outputs,
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride);
int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
float x, float y,
float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
index 1796f0a177..0fc5c4ec5c 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
@@ -1,567 +1,140 @@
-// Generated by llvm2cpp - DO NOT MODIFY!
-
-
-Module* createGallivmBuiltins(Module *mod) {
-
-mod->setModuleIdentifier("shader");
-
-// Type Definitions
-ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25);
-
-PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0);
-
-std::vector<const Type*>FuncTy_2_args;
-FuncTy_2_args.push_back(Type::FloatTy);
-FuncTy_2_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_2 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_2_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0);
-
-VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4);
-
-std::vector<const Type*>FuncTy_5_args;
-FuncTy_5_args.push_back(VectorTy_4);
-FunctionType* FuncTy_5 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_5_args,
- /*isVarArg=*/false);
-
-std::vector<const Type*>FuncTy_6_args;
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FunctionType* FuncTy_6 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_6_args,
- /*isVarArg=*/false);
-
-VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4);
-
-std::vector<const Type*>FuncTy_9_args;
-FunctionType* FuncTy_9 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_9_args,
- /*isVarArg=*/true);
-
-PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0);
-
-PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0);
-
-std::vector<const Type*>FuncTy_12_args;
-FuncTy_12_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_12 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_12_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0);
-
-std::vector<const Type*>FuncTy_13_args;
-FuncTy_13_args.push_back(VectorTy_4);
-FunctionType* FuncTy_13 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_13_args,
- /*isVarArg=*/false);
-
-
-// Function Declarations
-
-Function* func_approx = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"approx", mod);
-func_approx->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_approx_PAL = 0;
-func_approx->setParamAttrs(func_approx_PAL);
-
-Function* func_powf = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"powf", mod); // (external, no body)
-func_powf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_powf_PAL = 0;
-func_powf->setParamAttrs(func_powf_PAL);
-
-Function* func_lit = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"lit", mod);
-func_lit->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_lit_PAL = 0;
-func_lit->setParamAttrs(func_lit_PAL);
-
-Function* func_cmp = new Function(
- /*Type=*/FuncTy_6,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"cmp", mod);
-func_cmp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cmp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_cmp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_cmp->setParamAttrs(func_cmp_PAL);
-
-Function* func_vcos = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vcos", mod);
-func_vcos->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vcos_PAL = 0;
-func_vcos->setParamAttrs(func_vcos_PAL);
-
-Function* func_printf = new Function(
- /*Type=*/FuncTy_9,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"printf", mod); // (external, no body)
-func_printf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_printf_PAL = 0;
-func_printf->setParamAttrs(func_printf_PAL);
-
-Function* func_cosf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"cosf", mod); // (external, no body)
-func_cosf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cosf_PAL = 0;
-func_cosf->setParamAttrs(func_cosf_PAL);
-
-Function* func_scs = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"scs", mod);
-func_scs->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_scs_PAL = 0;
-func_scs->setParamAttrs(func_scs_PAL);
-
-Function* func_sinf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"sinf", mod); // (external, no body)
-func_sinf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_sinf_PAL = 0;
-func_sinf->setParamAttrs(func_sinf_PAL);
-
-Function* func_vsin = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vsin", mod);
-func_vsin->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vsin_PAL = 0;
-func_vsin->setParamAttrs(func_vsin_PAL);
-
-Function* func_kilp = new Function(
- /*Type=*/FuncTy_13,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"kilp", mod);
-func_kilp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_kilp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_kilp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_kilp->setParamAttrs(func_kilp_PAL);
-
-// Global Variable Declarations
-
-
-GlobalVariable* gvar_array__str = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str",
-mod);
-
-GlobalVariable* gvar_array__str1 = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str1",
-mod);
-
-// Constant Definitions
-Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
-Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
-ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
-ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
-Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
-Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
-std::vector<Constant*> const_packed_20_elems;
-ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
-const_packed_20_elems.push_back(const_float_21);
-UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_21);
-Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
-ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10));
-ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10));
-ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10));
-std::vector<Constant*> const_packed_26_elems;
-const_packed_26_elems.push_back(const_float_21);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_21);
-Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
-Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
-std::vector<Constant*> const_packed_28_elems;
-const_packed_28_elems.push_back(const_int32_19);
-ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10));
-const_packed_28_elems.push_back(const_int32_29);
-const_packed_28_elems.push_back(const_int32_25);
-const_packed_28_elems.push_back(const_int32_24);
-Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
-std::vector<Constant*> const_packed_30_elems;
-const_packed_30_elems.push_back(const_int32_19);
-const_packed_30_elems.push_back(const_int32_23);
-ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10));
-const_packed_30_elems.push_back(const_int32_31);
-const_packed_30_elems.push_back(const_int32_24);
-Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
-std::vector<Constant*> const_packed_32_elems;
-const_packed_32_elems.push_back(const_int32_19);
-const_packed_32_elems.push_back(const_int32_23);
-const_packed_32_elems.push_back(const_int32_25);
-ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10));
-const_packed_32_elems.push_back(const_int32_33);
-Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
-std::vector<Constant*> const_ptr_34_indices;
-const_ptr_34_indices.push_back(const_int32_19);
-const_ptr_34_indices.push_back(const_int32_19);
-Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
-UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
-std::vector<Constant*> const_ptr_36_indices;
-const_ptr_36_indices.push_back(const_int32_19);
-const_ptr_36_indices.push_back(const_int32_19);
-Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
-
-// Global Variable Definitions
-gvar_array__str->setInitializer(const_array_14);
-gvar_array__str1->setInitializer(const_array_15);
-
-// Function Definitions
-
-// Function: approx (func_approx)
-{
- Function::arg_iterator args = func_approx->arg_begin();
- Value* float_a = args++;
- float_a->setName("a");
- Value* float_b = args++;
- float_b->setName("b");
-
- BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
-
- // Block entry (label_entry)
- FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
- SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
- FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
- SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
- FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
- SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
- std::vector<Value*> float_call_params;
- float_call_params.push_back(float_a_addr_0);
- float_call_params.push_back(float_b_addr_1);
- CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry);
- float_call->setCallingConv(CallingConv::C);
- float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0;
- float_call->setParamAttrs(float_call_PAL);
-
- new ReturnInst(float_call, label_entry);
-
-}
-
-// Function: lit (func_lit)
-{
- Function::arg_iterator args = func_lit->arg_begin();
- Value* packed_tmp = args++;
- packed_tmp->setName("tmp");
-
- BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
- BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
- BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
-
- // Block entry (label_entry_38)
- ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38);
- FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38);
- new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
-
- // Block ifthen (label_ifthen)
- InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen);
- ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen);
- ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen);
- std::vector<Value*> float_call_41_params;
- float_call_41_params.push_back(float_tmp12);
- float_call_41_params.push_back(float_tmp14);
- CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen);
- float_call_41->setCallingConv(CallingConv::C);
- float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0;
- float_call_41->setParamAttrs(float_call_41_PAL);
-
- InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen);
- new ReturnInst(packed_tmp16, label_ifthen);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock)
- new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
-
-}
-
-// Function: cmp (func_cmp)
-{
- Function::arg_iterator args = func_cmp->arg_begin();
- Value* packed_tmp0 = args++;
- packed_tmp0->setName("tmp0");
- Value* packed_tmp1 = args++;
- packed_tmp1->setName("tmp1");
- Value* packed_tmp2 = args++;
- packed_tmp2->setName("tmp2");
-
- BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0);
- BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
- BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
- BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
- BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0);
- BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
- BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
-
- // Block entry (label_entry_44)
- ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44);
- CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44);
- FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44);
- ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44);
- CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44);
- FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44);
- SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44);
- new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44);
-
- // Block cond.?14 (label_cond__14)
- ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
- ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
- CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
- FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
-
- // Block cond.cont20 (label_cond_cont20)
- ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
- ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
- CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
- FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
-
- // Block cond.?28 (label_cond__28)
- PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28);
- packed_tmp23_reg2mem_0->reserveOperandSpace(2);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
- ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
- CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
- FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
-
- // Block cond.cont34 (label_cond_cont34)
- PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34);
- packed_tmp23_reg2mem_1->reserveOperandSpace(2);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
- ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
- CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
- FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
-
- // Block cond.?42 (label_cond__42)
- PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42);
- packed_tmp37_reg2mem_0->reserveOperandSpace(2);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
- new ReturnInst(packed_tmp5113, label_cond__42);
-
- // Block cond.cont48 (label_cond_cont48)
- PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48);
- packed_tmp37_reg2mem_1->reserveOperandSpace(2);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
- new ReturnInst(packed_tmp51, label_cond_cont48);
-
-}
-
-// Function: vcos (func_vcos)
-{
- Function::arg_iterator args = func_vcos->arg_begin();
- Value* packed_val = args++;
- packed_val->setName("val");
-
- BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0);
-
- // Block entry (label_entry_53)
- ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53);
- CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53);
- ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53);
- CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53);
- ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53);
- CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53);
- ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53);
- CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53);
- std::vector<Value*> int32_call_params;
- int32_call_params.push_back(const_ptr_34);
- int32_call_params.push_back(double_conv_54);
- int32_call_params.push_back(double_conv4);
- int32_call_params.push_back(double_conv7);
- int32_call_params.push_back(double_conv10);
- CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53);
- int32_call->setCallingConv(CallingConv::C);
- int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0;
- int32_call->setParamAttrs(int32_call_PAL);
-
- CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53);
- float_call13->setCallingConv(CallingConv::C);
- float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0;
- float_call13->setParamAttrs(float_call13_PAL);
-
- InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53);
- CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53);
- float_call18->setCallingConv(CallingConv::C);
- float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0;
- float_call18->setParamAttrs(float_call18_PAL);
-
- InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53);
- CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53);
- float_call23->setCallingConv(CallingConv::C);
- float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0;
- float_call23->setParamAttrs(float_call23_PAL);
-
- InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53);
- CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53);
- float_call28->setCallingConv(CallingConv::C);
- float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0;
- float_call28->setParamAttrs(float_call28_PAL);
-
- InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53);
- CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53);
- CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53);
- CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53);
- CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53);
- std::vector<Value*> int32_call43_params;
- int32_call43_params.push_back(const_ptr_36);
- int32_call43_params.push_back(double_conv33);
- int32_call43_params.push_back(double_conv36);
- int32_call43_params.push_back(double_conv39);
- int32_call43_params.push_back(double_conv42);
- CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53);
- int32_call43->setCallingConv(CallingConv::C);
- int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0;
- int32_call43->setParamAttrs(int32_call43_PAL);
-
- new ReturnInst(packed_tmp30, label_entry_53);
-
-}
-
-// Function: scs (func_scs)
-{
- Function::arg_iterator args = func_scs->arg_begin();
- Value* packed_val_58 = args++;
- packed_val_58->setName("val");
-
- BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0);
-
- // Block entry (label_entry_59)
- ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59);
- CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59);
- float_call_60->setCallingConv(CallingConv::C);
- float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0;
- float_call_60->setParamAttrs(float_call_60_PAL);
-
- InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59);
- CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59);
- float_call7->setCallingConv(CallingConv::C);
- float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0;
- float_call7->setParamAttrs(float_call7_PAL);
-
- InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59);
- new ReturnInst(packed_tmp9, label_entry_59);
-
-}
-
-// Function: vsin (func_vsin)
-{
- Function::arg_iterator args = func_vsin->arg_begin();
- Value* packed_val_62 = args++;
- packed_val_62->setName("val");
-
- BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0);
-
- // Block entry (label_entry_63)
- ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63);
- CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63);
- float_call_65->setCallingConv(CallingConv::C);
- float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0;
- float_call_65->setParamAttrs(float_call_65_PAL);
-
- InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63);
- InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63);
- InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63);
- InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63);
- new ReturnInst(packed_tmp15_67, label_entry_63);
-
-}
-
-// Function: kilp (func_kilp)
-{
- Function::arg_iterator args = func_kilp->arg_begin();
- Value* packed_val_69 = args++;
- packed_val_69->setName("val");
-
- BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0);
- BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
- BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0);
- BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0);
- BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
-
- // Block entry (label_entry_70)
- ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70);
- FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70);
-
- // Block lor_rhs (label_lor_rhs)
- ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs);
- FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs);
-
- // Block lor_rhs5 (label_lor_rhs5)
- ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5);
- FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5);
-
- // Block lor_rhs11 (label_lor_rhs11)
- ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11);
- FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11);
- CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11);
- new ReturnInst(int32_retval, label_lor_rhs11);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71)
- new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71);
-
-}
-
-return mod;
-
-}
+static const unsigned char llvm_builtins_data[] = {
+0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x29,0x02,0x00,0x00,0x01,0x10,0x00,0x00,
+0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39,
+0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02,
+0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24,
+0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64,
+0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c,
+0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x32,0x22,0x48,0x09,
+0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52,
+0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x29,0x80,0x21,0x00,
+0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,0x80,0x50,0x2b,0x03,
+0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,0x14,0x01,0x80,0x11,
+0x80,0x22,0x88,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,0x36,0x80,0x87,0x71,
+0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,0x70,0x87,0x7a,0xd8,
+0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,0xa0,
+0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,
+0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,0x71,0x60,0x07,0x7a,
+0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,0x20,0x07,0x7a,0x30,
+0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,
+0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,
+0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,0x72,0xa0,
+0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,0x07,0x74,0xa0,0x07,
+0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,
+0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,0x00,0x07,0x7a,0x60,
+0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0x07,
+0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,0x04,0x20,0x76,0x46,
+0xfc,0x6c,0x48,0x92,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,0x12,0x20,0x00,0x00,
+0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x01,0x00,0x00,0x00,0x30,0x24,0x59,0x00,0x20,
+0x08,0x00,0x00,0x00,0x86,0x24,0x0a,0x00,0x04,0x00,0x00,0x00,0xc0,0x90,0x84,0x01,
+0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12,
+0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,0x00,0x00,0x00,0x0c,
+0x49,0x14,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x49,0x01,0x00,0x41,0x00,0x00,0x00,
+0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90,
+0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02,
+0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66,
+0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95,
+0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00,
+0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00,
+0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60,
+0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,0x34,0xc9,0x30,0x41,
+0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,0x8d,0x35,0x56,0x01,
+0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0x46,0x41,0x08,0xcc,
+0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,0x35,0x04,0x80,0x39,
+0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,0x04,0x3e,0x30,0x0c,
+0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,0x05,0xd1,0x4c,0x11,
+0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00,
+0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84,
+0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00,
+0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15,
+0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19,
+0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,0x17,0x60,0x20,0xc5,
+0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,0xf3,0xd4,0xb8,0x69,
+0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,0x0c,0x13,0xf3,0x9c,
+0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,0x8b,0x23,0x28,0x76,
+0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,0xd1,0x4c,0x11,0x66,
+0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,0x04,0x00,0x00,0x00,
+0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,0x11,0x00,0x00,0x00,
+0x63,0x08,0x4d,0x64,0x16,0xc1,0x49,0x86,0xab,0x22,0x66,0x19,0x02,0x01,0x1b,0x43,
+0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,0x0a,0x20,0x0b,0x34,
+0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x24,0x83,0x57,0x11,0xb3,
+0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,0x48,0xb3,0x04,0xc6,
+0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,0x63,0x08,0xcd,0x64,
+0x64,0x40,0x70,0x92,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,0x60,0x0c,0xc1,0x99,
+0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,0x33,0x38,0xd0,0x00,
+0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,0xe0,0x24,0x03,0x1b,
+0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,0x30,0x63,0x08,0x0f,
+0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,0xa0,0x06,0x70,0x00,
+0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x76,0x52,0x4c,0xcc,
+0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,0xc6,0x50,0x8a,0x89,
+0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,0x79,0x68,0x73,0x20,
+0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,0x9e,0xdb,0x32,0x88,
+0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,0xa7,0xb7,0x95,0x62,
+0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,0x34,0x35,0x56,0x62,
+0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,0x2c,0x82,0xd3,0x0c,
+0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,0x24,0x01,0x63,0xec,
+0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,0xfc,0xc4,0xd0,0x90,
+0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,0xc1,0x71,0x7b,0x29,
+0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,0x32,0xf6,0xe6,0x46,
+0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,0x4e,0x33,0x58,0x47,
+0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,0x33,0xe1,0xbc,0xa5,
+0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,0x3a,0x40,0xc6,0xde,
+0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,0xed,0x82,0x10,0x9c,
+0xa6,0xba,0x81,0x44,0x70,0x9a,0xc1,0x17,0x9c,0x66,0x32,0x93,0x42,0x60,0x1e,0x7b,
+0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,0xa7,0x6d,0xa4,0x98,
+0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,0x9c,0x66,0xc0,0x7b,
+0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,0x8b,0xe0,0x34,0x83,
+0x2f,0x38,0xcd,0x64,0xd3,0xe6,0x61,0x08,0x4e,0x53,0xd5,0xf6,0x01,0x14,0x44,0x33,
+0x45,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00,
+0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04,
+0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,0x1e,0xe1,0x19,0xc6,
+0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,0x15,0xc1,0x31,0x84,
+0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,0x71,0x6c,0x23,0x38,
+0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79,
+0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5,
+0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0,
+0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,0x63,0x21,0x40,0x70,
+0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00,
+0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf,
+0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2,
+0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00,
+0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7,
+0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21,
+0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54,
+0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13,
+0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd,
+0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c,
+0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00,
+0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04,
+0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48,
+0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85,
+0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31,
+0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c,
+0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00,
+0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,
+0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,
+0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51,
+0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38,
+0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51,
+0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d,
+0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05,
+0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00,
+0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04,
+0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4,
+0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58,
+0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33,
+0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00,
+0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f,
+0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52,
+0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec,
+0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,
+0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,
+0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,
+0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,
+0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18,
+0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,
+0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,
+0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c,
+0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00};
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
index 8f9830d0b1..857c190f7b 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -41,6 +41,7 @@
#include "pipe/p_context.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_util.h"
#include "tgsi/exec/tgsi_exec.h"
#include "tgsi/util/tgsi_dump.h"
@@ -179,22 +180,63 @@ typedef void (*vertex_shader_runner)(void *ainputs,
float (*aconsts)[4],
void *temps);
-
+#define MAX_TGSI_VERTICES 4
/*!
This function is used to execute the gallivm_prog in software. Before calling
this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
function.
*/
int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
- struct tgsi_exec_vector *inputs,
- struct tgsi_exec_vector *dests,
- float (*consts)[4],
- struct tgsi_exec_vector *temps)
+ struct tgsi_exec_machine *machine,
+ const float (*input)[4],
+ unsigned num_inputs,
+ float (*output)[4],
+ unsigned num_outputs,
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
+ unsigned int i, j;
+ unsigned slot;
vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
+
assert(runner);
- /*FIXME*/
- runner(inputs, dests, consts, temps);
+
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
+
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
+
+ /* run shader */
+ runner(machine->Inputs,
+ machine->Outputs,
+ (float (*)[4]) constants,
+ machine->Temps);
+
+ /* Unswizzle all output results
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ output = (float (*)[4])((char *)output + output_stride);
+ }
+ }
return 0;
}
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 8919491792..1a98491b82 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -35,6 +35,8 @@
#include "storage.h"
+#include "pipe/p_util.h"
+
#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
#include <llvm/DerivedTypes.h>
@@ -42,7 +44,8 @@
#include <llvm/InstrTypes.h>
#include <llvm/Instructions.h>
#include <llvm/ParameterAttributes.h>
-#include <llvm/ParamAttrsList.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
#include <sstream>
#include <fstream>
@@ -53,7 +56,6 @@ using namespace llvm;
#include "gallivm_builtins.cpp"
#if 0
-
llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals)
{
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -84,7 +86,10 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB
m_llvmLit = 0;
m_fmtPtr = 0;
- createGallivmBuiltins(m_mod);
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&llvm_builtins_data[0],
+ (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]);
+ m_mod = ParseBitcodeFile(buffer);
}
llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2)
@@ -134,12 +139,12 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> fsqrtArgs;
fsqrtArgs.push_back(Type::FloatTy);
- ParamAttrsList *fsqrtPal = 0;
+ PAListPtr fsqrtPal;
FunctionType* fsqrtType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/fsqrtArgs,
/*isVarArg=*/false);
- m_llvmFSqrt = new Function(
+ m_llvmFSqrt = Function::Create(
/*Type=*/fsqrtType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"llvm.sqrt.f32", m_mod);
@@ -161,10 +166,9 @@ llvm::Value * Instructions::rsq(llvm::Value *in1)
Value *abs = callFAbs(x);
Value *sqrt = callFSqrt(abs);
- Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
- APFloat(1.f)),
- sqrt,
- name("rsqrt"));
+ Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)),
+ sqrt,
+ name("rsqrt"));
return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt);
}
@@ -191,12 +195,12 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> fabsArgs;
fabsArgs.push_back(Type::FloatTy);
- ParamAttrsList *fabsPal = 0;
+ PAListPtr fabsPal;
FunctionType* fabsType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/fabsArgs,
/*isVarArg=*/false);
- m_llvmFAbs = new Function(
+ m_llvmFAbs = Function::Create(
/*Type=*/fabsType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"fabs", m_mod);
@@ -234,12 +238,12 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2)
std::vector<const Type*> powArgs;
powArgs.push_back(Type::FloatTy);
powArgs.push_back(Type::FloatTy);
- ParamAttrsList *powPal = 0;
+ PAListPtr powPal;
FunctionType* powType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/powArgs,
/*isVarArg=*/false);
- m_llvmPow = new Function(
+ m_llvmPow = Function::Create(
/*Type=*/powType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"llvm.pow.f32", m_mod);
@@ -273,9 +277,8 @@ llvm::Value * Instructions::rcp(llvm::Value *in1)
Value *x1 = m_builder.CreateExtractElement(in1,
m_storage->constantInt(0),
name("x1"));
- Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
- APFloat(1.f)),
- x1, name("rcp"));
+ Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)),
+ x1, name("rcp"));
return vectorFromVals(res, res, res, res);
}
@@ -314,13 +317,13 @@ llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2)
m_storage->constantInt(3),
name("w"));
Value *ry = m_builder.CreateMul(y1, y2, name("tyuy"));
- return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)),
+ return vectorFromVals(ConstantFP::get(APFloat(1.f)),
ry, z, w);
}
llvm::Value * Instructions::ex2(llvm::Value *in)
{
- llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)),
+ llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)),
m_builder.CreateExtractElement(
in, m_storage->constantInt(0),
name("x1")));
@@ -333,12 +336,12 @@ llvm::Value * Instructions::callFloor(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> floorArgs;
floorArgs.push_back(Type::FloatTy);
- ParamAttrsList *floorPal = 0;
+ PAListPtr floorPal;
FunctionType* floorType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/floorArgs,
/*isVarArg=*/false);
- m_llvmFloor = new Function(
+ m_llvmFloor = Function::Create(
/*Type=*/floorType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"floorf", m_mod);
@@ -376,12 +379,12 @@ llvm::Value * Instructions::callFLog(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> flogArgs;
flogArgs.push_back(Type::FloatTy);
- ParamAttrsList *flogPal = 0;
+ PAListPtr flogPal;
FunctionType* flogType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/flogArgs,
/*isVarArg=*/false);
- m_llvmFlog = new Function(
+ m_llvmFlog = Function::Create(
/*Type=*/flogType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"logf", m_mod);
@@ -504,12 +507,12 @@ void Instructions::printVector(llvm::Value *val)
llvm::Function * Instructions::declarePrintf()
{
std::vector<const Type*> args;
- ParamAttrsList *params = 0;
+ PAListPtr params;
FunctionType* funcTy = FunctionType::get(
/*Result=*/IntegerType::get(32),
/*Params=*/args,
/*isVarArg=*/true);
- Function* func_printf = new Function(
+ Function* func_printf = Function::Create(
/*Type=*/funcTy,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"printf", m_mod);
@@ -521,7 +524,7 @@ llvm::Function * Instructions::declarePrintf()
llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2)
{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
Constant *const0f = Constant::getNullValue(Type::FloatTy);
std::vector<llvm::Value*> vec1 = extractVector(in1);
@@ -542,7 +545,7 @@ llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2)
}
llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2)
{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
Constant *const0f = Constant::getNullValue(Type::FloatTy);
std::vector<llvm::Value*> vec1 = extractVector(in1);
@@ -566,7 +569,7 @@ llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2)
llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2)
{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
Constant *const0f = Constant::getNullValue(Type::FloatTy);
std::vector<llvm::Value*> vec1 = extractVector(in1);
@@ -633,8 +636,8 @@ llvm::Value * Instructions::abs(llvm::Value *in)
void Instructions::ifop(llvm::Value *in)
{
- BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0);
- BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0);
+ BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0);
+ BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0);
//BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0);
//BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0);
@@ -660,7 +663,7 @@ llvm::BasicBlock * Instructions::currentBlock() const
void Instructions::elseop()
{
assert(!m_ifStack.empty());
- BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0);
+ BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0);
m_builder.CreateBr(ifend);
m_builder.SetInsertPoint(m_ifStack.top());
currentBlock()->setName(name("ifelse"));
@@ -687,8 +690,8 @@ llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2,
void Instructions::beginLoop()
{
- BasicBlock *begin = new BasicBlock(name("loop"), m_func,0);
- BasicBlock *end = new BasicBlock(name("endloop"), m_func,0);
+ BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0);
+ BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0);
m_builder.CreateBr(begin);
Loop loop;
@@ -711,7 +714,7 @@ void Instructions::endLoop()
void Instructions::brk()
{
assert(!m_loopStack.empty());
- BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0);
+ BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0);
m_builder.CreateBr(m_loopStack.top().end);
m_builder.SetInsertPoint(unr);
}
@@ -760,13 +763,13 @@ llvm::Function * Instructions::declareFunc(int label)
args.push_back(vecPtr);
args.push_back(vecPtr);
args.push_back(vecPtr);
- ParamAttrsList *params = 0;
+ PAListPtr params;
FunctionType *funcType = FunctionType::get(
/*Result=*/Type::VoidTy,
/*Params=*/args,
/*isVarArg=*/false);
std::string name = createFuncName(label);
- Function *func = new Function(
+ Function *func = Function::Create(
/*Type=*/funcType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/name.c_str(), m_mod);
@@ -784,7 +787,7 @@ void Instructions::bgnSub(unsigned label)
ptr_INPUT->setName("INPUT");
m_storage->pushArguments(ptr_INPUT);
- llvm::BasicBlock *entry = new BasicBlock("entry", func, 0);
+ llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0);
m_func = func;
m_builder.SetInsertPoint(entry);
@@ -809,10 +812,10 @@ llvm::Function * Instructions::findFunction(int label)
llvm::Value * Instructions::constVector(float x, float y, float z, float w)
{
std::vector<Constant*> vec(4);
- vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x));
- vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y));
- vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z));
- vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w));
+ vec[0] = ConstantFP::get(APFloat(x));
+ vec[1] = ConstantFP::get(APFloat(y));
+ vec[2] = ConstantFP::get(APFloat(z));
+ vec[3] = ConstantFP::get(APFloat(w));
return ConstantVector::get(m_floatVecType, vec);
}
diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h
index 9ebc17dd8e..19ca84ddc6 100644
--- a/src/gallium/auxiliary/gallivm/instructions.h
+++ b/src/gallium/auxiliary/gallivm/instructions.h
@@ -36,7 +36,7 @@
#include <llvm/BasicBlock.h>
#include <llvm/Module.h>
#include <llvm/Value.h>
-#include <llvm/Support/LLVMBuilder.h>
+#include <llvm/Support/IRBuilder.h>
#include <map>
#include <stack>
@@ -125,7 +125,7 @@ private:
llvm::Module *m_mod;
llvm::Function *m_func;
char m_name[32];
- llvm::LLVMFoldingBuilder m_builder;
+ llvm::IRBuilder m_builder;
int m_idx;
llvm::VectorType *m_floatVecType;
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index 6f83b56a72..76049ade7c 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -1,8 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
#include "instructionssoa.h"
#include "storagesoa.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_util.h"
#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
@@ -10,7 +37,10 @@
#include <llvm/Function.h>
#include <llvm/Instructions.h>
#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/ParamAttrsList.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
#include <iostream>
@@ -143,16 +173,56 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
void InstructionsSoa::createFunctionMap()
{
+ m_functionsMap[TGSI_OPCODE_ABS] = "abs";
m_functionsMap[TGSI_OPCODE_DP3] = "dp3";
m_functionsMap[TGSI_OPCODE_DP4] = "dp4";
+ m_functionsMap[TGSI_OPCODE_MIN] = "min";
+ m_functionsMap[TGSI_OPCODE_MAX] = "max";
m_functionsMap[TGSI_OPCODE_POWER] = "pow";
+ m_functionsMap[TGSI_OPCODE_LIT] = "lit";
+ m_functionsMap[TGSI_OPCODE_RSQ] = "rsq";
}
void InstructionsSoa::createDependencies()
{
- std::vector<std::string> powDeps(1);
- powDeps[0] = "powf";
- m_builtinDependencies["pow"] = powDeps;
+ {
+ std::vector<std::string> powDeps(2);
+ powDeps[0] = "powf";
+ powDeps[1] = "powvec";
+ m_builtinDependencies["pow"] = powDeps;
+ }
+ {
+ std::vector<std::string> absDeps(2);
+ absDeps[0] = "fabsf";
+ absDeps[1] = "absvec";
+ m_builtinDependencies["abs"] = absDeps;
+ }
+ {
+ std::vector<std::string> maxDeps(1);
+ maxDeps[0] = "maxvec";
+ m_builtinDependencies["max"] = maxDeps;
+ }
+ {
+ std::vector<std::string> minDeps(1);
+ minDeps[0] = "minvec";
+ m_builtinDependencies["min"] = minDeps;
+ }
+ {
+ std::vector<std::string> litDeps(4);
+ litDeps[0] = "minvec";
+ litDeps[1] = "maxvec";
+ litDeps[2] = "powf";
+ litDeps[3] = "powvec";
+ m_builtinDependencies["lit"] = litDeps;
+ }
+ {
+ std::vector<std::string> rsqDeps(4);
+ rsqDeps[0] = "sqrtf";
+ rsqDeps[1] = "sqrtvec";
+ rsqDeps[2] = "fabsf";
+ rsqDeps[3] = "absvec";
+ m_builtinDependencies["rsq"] = rsqDeps;
+ }
}
llvm::Function * InstructionsSoa::function(int op)
@@ -162,9 +232,13 @@ llvm::Function * InstructionsSoa::function(int op)
std::string name = m_functionsMap[op];
+ std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl;
+
std::vector<std::string> deps = m_builtinDependencies[name];
for (unsigned int i = 0; i < deps.size(); ++i) {
- injectFunction(m_builtins->getFunction(deps[i]));
+ llvm::Function *func = m_builtins->getFunction(deps[i]);
+ std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl;
+ injectFunction(func);
}
llvm::Function *originalFunc = m_builtins->getFunction(name);
@@ -183,10 +257,22 @@ llvm::Module * InstructionsSoa::currentModule() const
void InstructionsSoa::createBuiltins()
{
- m_builtins = createSoaBuiltins();
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&soabuiltins_data[0],
+ (const char*)&soabuiltins_data[Elements(soabuiltins_data)]);
+ m_builtins = ParseBitcodeFile(buffer);
+ std::cout<<"Builtins created at "<<m_builtins<<std::endl;
+ assert(m_builtins);
createDependencies();
}
+
+std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1)
+{
+ llvm::Function *func = function(TGSI_OPCODE_ABS);
+ return callBuiltin(func, in1);
+}
+
std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
{
@@ -204,32 +290,32 @@ llvm::Value * InstructionsSoa::allocaTemp()
std::vector<Value*> indices;
indices.push_back(m_storage->constantInt(0));
indices.push_back(m_storage->constantInt(0));
- GetElementPtrInst *getElem = new GetElementPtrInst(alloca,
- indices.begin(),
- indices.end(),
- name("allocaPtr"),
- m_builder.GetInsertBlock());
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca,
+ indices.begin(),
+ indices.end(),
+ name("allocaPtr"),
+ m_builder.GetInsertBlock());
return getElem;
}
std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr)
{
- GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(0),
- name("xPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(1),
- name("yPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(2),
- name("zPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(3),
- name("wPtr"),
- m_builder.GetInsertBlock());
+ GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(0),
+ name("xPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(1),
+ name("yPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(2),
+ name("zPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(3),
+ name("wPtr"),
+ m_builder.GetInsertBlock());
std::vector<llvm::Value*> res(4);
res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock());
@@ -320,6 +406,21 @@ std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> i
return callBuiltin(func, in1, in2);
}
+std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MIN);
+ return callBuiltin(func, in1, in2);
+}
+
+
+std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MAX);
+ return callBuiltin(func, in1, in2);
+}
+
void checkFunction(Function *func)
{
for (Function::const_iterator BI = func->begin(), BE = func->end();
@@ -354,29 +455,56 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
}
llvm::Function *func = 0;
if (originalFunc->isDeclaration()) {
- std::cout << "function decleration" <<std::endl;
- func = new Function(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage,
- originalFunc->getName(), currentModule());
+ func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage,
+ originalFunc->getName(), currentModule());
func->setCallingConv(CallingConv::C);
- const ParamAttrsList *pal = 0;
+ const PAListPtr pal;
func->setParamAttrs(pal);
currentModule()->dump();
} else {
DenseMap<const Value*, Value *> val;
+ val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf");
val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf");
+ val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf");
+ func = CloneFunction(originalFunc, val);
+#if 0
std::cout <<" replacing "<<m_builtins->getFunction("powf")
<<", with " <<currentModule()->getFunction("powf")<<std::endl;
- func = CloneFunction(originalFunc, val);
std::cout<<"1111-------------------------------"<<std::endl;
checkFunction(originalFunc);
std::cout<<"2222-------------------------------"<<std::endl;
checkFunction(func);
std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl;
+#endif
currentModule()->getFunctionList().push_back(func);
- std::cout << "Func parent is "<<func->getParent()
- <<", cur is "<<currentModule() <<std::endl;
}
if (op != TGSI_OPCODE_LAST) {
m_functions[op] = func;
}
}
+
+std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
+ res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
+ res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
+ res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_LIT);
+ return callBuiltin(func, in);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_RSQ);
+ return callBuiltin(func, in);
+}
+
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
index b9104ea286..3e20b652dd 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.h
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -29,7 +29,7 @@
#define INSTRUCTIONSSOA_H
#include <pipe/p_shader_tokens.h>
-#include <llvm/Support/LLVMBuilder.h>
+#include <llvm/Support/IRBuilder.h>
#include <map>
#include <vector>
@@ -48,6 +48,7 @@ public:
InstructionsSoa(llvm::Module *mod, llvm::Function *func,
llvm::BasicBlock *block, StorageSoa *storage);
+ std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1);
std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in);
std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2);
@@ -55,13 +56,21 @@ public:
const std::vector<llvm::Value*> in2);
std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in);
std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2,
const std::vector<llvm::Value*> in3);
+ std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2);
std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1);
+ std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
void end();
std::vector<llvm::Value*> extractVector(llvm::Value *vector);
@@ -87,7 +96,7 @@ private:
const std::vector<llvm::Value*> in3);
void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST);
private:
- llvm::LLVMFoldingBuilder m_builder;
+ llvm::IRBuilder m_builder;
StorageSoa *m_storage;
std::map<int, std::string> m_functionsMap;
diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c
index 4f98d754ba..6b9d626ed4 100644
--- a/src/gallium/auxiliary/gallivm/llvm_builtins.c
+++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c
@@ -1,4 +1,3 @@
-/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
@@ -30,7 +29,7 @@
* Authors:
* Zack Rusin zack@tungstengraphics.com
*/
-typedef __attribute__(( ocu_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float float4;
extern float powf(float a, float b);
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
index 4d658be520..78f84510e2 100644
--- a/src/gallium/auxiliary/gallivm/soabuiltins.c
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -31,7 +31,30 @@
* Authors:
* Zack Rusin zack@tungstengraphics.com
*/
-typedef __attribute__(( ocu_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+
+extern float fabsf(float val);
+
+float4 absvec(float4 vec)
+{
+ float4 res;
+ res.x = fabsf(vec.x);
+ res.y = fabsf(vec.y);
+ res.z = fabsf(vec.z);
+ res.w = fabsf(vec.w);
+
+ return res;
+}
+
+void abs(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ res[0] = absvec(tmp0x);
+ res[1] = absvec(tmp0y);
+ res[2] = absvec(tmp0z);
+ res[3] = absvec(tmp0w);
+}
void dp3(float4 *res,
float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
@@ -61,30 +84,104 @@ void dp4(float4 *res,
}
extern float powf(float num, float p);
+extern float sqrtf(float x);
+
+float4 powvec(float4 vec, float4 q)
+{
+ float4 p;
+ p.x = powf(vec.x, q.x);
+ p.y = powf(vec.y, q.y);
+ p.z = powf(vec.z, q.z);
+ p.w = powf(vec.w, q.w);
+ return p;
+}
void pow(float4 *res,
float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
{
- float4 p;
- p.x = powf(tmp0x.x, tmp1x.x);
- p.y = powf(tmp0x.y, tmp1x.y);
- p.z = powf(tmp0x.z, tmp1x.z);
- p.w = powf(tmp0x.w, tmp1x.w);
-
- res[0] = p;
- res[1] = p;
- res[2] = p;
- res[3] = p;
+ res[0] = powvec(tmp0x, tmp1x);
+ res[1] = res[0];
+ res[2] = res[0];
+ res[3] = res[0];
+}
+
+float4 minvec(float4 a, float4 b)
+{
+ return (float4){(a.x < b.x) ? a.x : b.x,
+ (a.y < b.y) ? a.y : b.y,
+ (a.z < b.z) ? a.z : b.z,
+ (a.w < b.w) ? a.w : b.w};
+}
+
+void min(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = minvec(tmp0x, tmp1x);
+ res[1] = minvec(tmp0y, tmp1y);
+ res[2] = minvec(tmp0z, tmp1z);
+ res[3] = minvec(tmp0w, tmp1w);
+}
+
+
+float4 maxvec(float4 a, float4 b)
+{
+ return (float4){(a.x > b.x) ? a.x : b.x,
+ (a.y > b.y) ? a.y : b.y,
+ (a.z > b.z) ? a.z : b.z,
+ (a.w > b.w) ? a.w : b.w};
+}
+
+void max(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = maxvec(tmp0x, tmp1x);
+ res[1] = maxvec(tmp0y, tmp1y);
+ res[2] = maxvec(tmp0z, tmp1z);
+ res[3] = maxvec(tmp0w, tmp1w);
+}
+
+
+void lit(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
+ const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
+ const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f};
+
+ res[0] = (float4){1.0, 1.0, 1.0, 1.0};
+ if (tmp0x.x > 0) {
+ float4 tmpy = maxvec(tmp0y, zerovec);
+ float4 tmpw = minvec(tmp0w, plus128);
+ tmpw = maxvec(tmpw, min128);
+ res[1] = tmp0x;
+ res[2] = powvec(tmpy, tmpw);
+ } else {
+ res[1] = zerovec;
+ res[2] = zerovec;
+ }
+ res[3] = (float4){1.0, 1.0, 1.0, 1.0};
}
-#if 0
-void yo(float4 *out, float4 *in)
+
+float4 sqrtvec(float4 vec)
{
- float4 res[4];
+ float4 p;
+ p.x = sqrtf(vec.x);
+ p.y = sqrtf(vec.y);
+ p.z = sqrtf(vec.z);
+ p.w = sqrtf(vec.w);
+ return p;
+}
- dp3(res, in[0], in[1], in[2], in[3],
- in[4], in[5], in[6], in[7]);
- out[1] = res[1];
+void rsq(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ const float4 onevec = (float4) {1., 1., 1., 1.};
+ res[0] = onevec/sqrtvec(absvec(tmp0x));
+ res[1] = onevec/sqrtvec(absvec(tmp0y));
+ res[2] = onevec/sqrtvec(absvec(tmp0z));
+ res[3] = onevec/sqrtvec(absvec(tmp0w));
}
-#endif
diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp
index c4326de8c5..6f373f6dd5 100644
--- a/src/gallium/auxiliary/gallivm/storage.cpp
+++ b/src/gallium/auxiliary/gallivm/storage.cpp
@@ -69,10 +69,10 @@ llvm::Constant *Storage::shuffleMask(int vec)
{
if (!m_extSwizzleVec) {
std::vector<Constant*> elems;
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
+ elems.push_back(ConstantFP::get(APFloat(0.f)));
+ elems.push_back(ConstantFP::get(APFloat(1.f)));
+ elems.push_back(ConstantFP::get(APFloat(0.f)));
+ elems.push_back(ConstantFP::get(APFloat(1.f)));
m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems);
}
@@ -186,26 +186,26 @@ llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ)
if ((mask & TGSI_WRITEMASK_X)) {
llvm::Value *x = new ExtractElementInst(src, unsigned(0),
name("x"), m_block);
- dst = new InsertElementInst(dst, x, unsigned(0),
- name("dstx"), m_block);
+ dst = InsertElementInst::Create(dst, x, unsigned(0),
+ name("dstx"), m_block);
}
if ((mask & TGSI_WRITEMASK_Y)) {
llvm::Value *y = new ExtractElementInst(src, unsigned(1),
name("y"), m_block);
- dst = new InsertElementInst(dst, y, unsigned(1),
- name("dsty"), m_block);
+ dst = InsertElementInst::Create(dst, y, unsigned(1),
+ name("dsty"), m_block);
}
if ((mask & TGSI_WRITEMASK_Z)) {
llvm::Value *z = new ExtractElementInst(src, unsigned(2),
name("z"), m_block);
- dst = new InsertElementInst(dst, z, unsigned(2),
- name("dstz"), m_block);
+ dst = InsertElementInst::Create(dst, z, unsigned(2),
+ name("dstz"), m_block);
}
if ((mask & TGSI_WRITEMASK_W)) {
llvm::Value *w = new ExtractElementInst(src, unsigned(3),
name("w"), m_block);
- dst = new InsertElementInst(dst, w, unsigned(3),
- name("dstw"), m_block);
+ dst = InsertElementInst::Create(dst, w, unsigned(3),
+ name("dstw"), m_block);
}
return dst;
}
@@ -295,10 +295,10 @@ llvm::Value * Storage::immediateElement(int idx)
void Storage::addImmediate(float *val)
{
std::vector<Constant*> vec(4);
- vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0]));
- vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1]));
- vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2]));
- vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3]));
+ vec[0] = ConstantFP::get(APFloat(val[0]));
+ vec[1] = ConstantFP::get(APFloat(val[1]));
+ vec[2] = ConstantFP::get(APFloat(val[2]));
+ vec[3] = ConstantFP::get(APFloat(val[3]));
m_immediates.push_back(ConstantVector::get(m_floatVecType, vec));
}
@@ -308,11 +308,11 @@ llvm::Value * Storage::elemPtr(Args arg)
std::vector<Value*> indices;
indices.push_back(constantInt(0));
indices.push_back(constantInt(static_cast<int>(arg)));
- GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT,
- indices.begin(),
- indices.end(),
- name("input_ptr"),
- m_block);
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("input_ptr"),
+ m_block);
return new LoadInst(getElem, name("input_field"), false, m_block);
}
@@ -322,7 +322,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
GetElementPtrInst *getElem = 0;
if (indIdx) {
- getElem = new GetElementPtrInst(ptr,
+ getElem = GetElementPtrInst::Create(ptr,
BinaryOperator::create(Instruction::Add,
indIdx,
constantInt(idx),
@@ -331,7 +331,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
name("field"),
m_block);
} else {
- getElem = new GetElementPtrInst(ptr,
+ getElem = GetElementPtrInst::Create(ptr,
constantInt(idx),
name("field"),
m_block);
@@ -350,7 +350,7 @@ void Storage::setKilElement(llvm::Value *val)
std::vector<Value*> indices;
indices.push_back(constantInt(0));
indices.push_back(constantInt(static_cast<int>(KilArg)));
- GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
+ GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT,
indices.begin(),
indices.end(),
name("kil_ptr"),
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
index bb6fe3d7e1..78d754371f 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.cpp
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -207,11 +207,11 @@ llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index,
indices.push_back(index);
indices.push_back(constantInt(channel));
- GetElementPtrInst *getElem = new GetElementPtrInst(ptr,
- indices.begin(),
- indices.end(),
- name("ptr"),
- m_block);
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr,
+ indices.begin(),
+ indices.end(),
+ name("ptr"),
+ m_block);
return getElem;
}
@@ -264,10 +264,10 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v
{
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
std::vector<Constant*> immValues;
- ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0]));
- ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1]));
- ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2]));
- ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3]));
+ ConstantFP *constx = ConstantFP::get(APFloat(vec[0]));
+ ConstantFP *consty = ConstantFP::get(APFloat(vec[1]));
+ ConstantFP *constz = ConstantFP::get(APFloat(vec[2]));
+ ConstantFP *constw = ConstantFP::get(APFloat(vec[3]));
immValues.push_back(constx);
immValues.push_back(consty);
immValues.push_back(constz);
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index ab9e7a06fb..9695358ab8 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -692,12 +692,14 @@ translate_instructionir(llvm::Module *module,
}
break;
case TGSI_OPCODE_LIT: {
+ out = instr->lit(inputs[0]);
}
break;
case TGSI_OPCODE_RCP: {
}
break;
case TGSI_OPCODE_RSQ: {
+ out = instr->rsq(inputs[0]);
}
break;
case TGSI_OPCODE_EXP:
@@ -724,9 +726,11 @@ translate_instructionir(llvm::Module *module,
}
break;
case TGSI_OPCODE_MIN: {
+ out = instr->min(inputs[0], inputs[1]);
}
break;
case TGSI_OPCODE_MAX: {
+ out = instr->max(inputs[0], inputs[1]);
}
break;
case TGSI_OPCODE_SLT: {
@@ -740,6 +744,7 @@ translate_instructionir(llvm::Module *module,
}
break;
case TGSI_OPCODE_SUB: {
+ out = instr->sub(inputs[0], inputs[1]);
}
break;
case TGSI_OPCODE_LERP: {
@@ -781,6 +786,7 @@ translate_instructionir(llvm::Module *module,
case TGSI_OPCODE_MULTIPLYMATRIX:
break;
case TGSI_OPCODE_ABS: {
+ out = instr->abs(inputs[0]);
}
break;
case TGSI_OPCODE_RCC:
@@ -1014,7 +1020,7 @@ tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
Value *ptr_INPUT = args++;
ptr_INPUT->setName("input");
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
tgsi_parse_init(&parse, tokens);
@@ -1085,7 +1091,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
Value *temps = args++;
temps->setName("temps");
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
tgsi_parse_init(&parse, tokens);
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
index a9fa518c67..d654dbcc91 100644
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ b/src/gallium/auxiliary/pipebuffer/Makefile
@@ -6,9 +6,11 @@ LIBNAME = pipebuffer
C_SOURCES = \
pb_buffer_fenced.c \
pb_buffer_malloc.c \
+ pb_bufmgr_cache.c \
pb_bufmgr_fenced.c \
pb_bufmgr_mm.c \
pb_bufmgr_pool.c \
+ pb_bufmgr_slab.c \
pb_winsys.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
index 3d41fd84a6..604a217982 100644
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ b/src/gallium/auxiliary/pipebuffer/SConscript
@@ -5,9 +5,11 @@ pipebuffer = env.ConvenienceLibrary(
source = [
'pb_buffer_fenced.c',
'pb_buffer_malloc.c',
+ 'pb_bufmgr_cache.c',
'pb_bufmgr_fenced.c',
'pb_bufmgr_mm.c',
'pb_bufmgr_pool.c',
+ 'pb_bufmgr_slab.c',
'pb_winsys.c',
])
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 4b09c80b2a..49705cb862 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -193,6 +193,17 @@ pb_reference(struct pb_buffer **dst,
/**
+ * Utility function to check whether a requested alignment is consistent with
+ * the provided alignment or not.
+ */
+static INLINE int
+pb_check_alignment(size_t requested, size_t provided)
+{
+ return requested <= provided && (provided % requested) == 0;
+}
+
+
+/**
* Malloc-based buffer to store data that can't be used by the graphics
* hardware.
*/
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 24ba61a0b7..2fa0842971 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -34,7 +34,14 @@
*/
+#include "pipe/p_config.h"
+
+#if defined(PIPE_OS_LINUX)
+#include <unistd.h>
+#endif
+
#include "pipe/p_compiler.h"
+#include "pipe/p_error.h"
#include "pipe/p_debug.h"
#include "pipe/p_winsys.h"
#include "pipe/p_thread.h"
@@ -44,9 +51,6 @@
#include "pb_buffer.h"
#include "pb_buffer_fenced.h"
-#ifndef WIN32
-#include <unistd.h>
-#endif
/**
@@ -54,6 +58,13 @@
*/
#define SUPER(__derived) (&(__derived)->base)
+#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \
+ ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE )
+#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \
+ ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE )
+#define PIPE_BUFFER_USAGE_WRITE \
+ ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE )
+
struct fenced_buffer_list
{
@@ -76,6 +87,15 @@ struct fenced_buffer
struct pb_buffer *buffer;
+ /* FIXME: protect access with mutex */
+
+ /**
+ * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
+ * buffer usage.
+ */
+ unsigned flags;
+
+ unsigned mapcount;
struct pipe_fence_handle *fence;
struct list_head head;
@@ -97,7 +117,10 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf)
{
struct fenced_buffer_list *fenced_list = fenced_buf->list;
+ assert(fenced_buf->base.base.refcount);
+ assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
+
assert(!fenced_buf->head.prev);
assert(!fenced_buf->head.next);
LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed);
@@ -111,8 +134,6 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf)
static INLINE void
_fenced_buffer_destroy(struct fenced_buffer *fenced_buf)
{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
assert(!fenced_buf->base.base.refcount);
assert(!fenced_buf->fence);
pb_reference(&fenced_buf->buffer, NULL);
@@ -128,8 +149,8 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf)
assert(fenced_buf->fence);
- assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
winsys->fence_reference(winsys, &fenced_buf->fence, NULL);
+ fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
assert(fenced_buf->head.prev);
assert(fenced_buf->head.next);
@@ -174,6 +195,9 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
break;
prev_fence = fenced_buf->fence;
}
+ else {
+ assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
+ }
_fenced_buffer_remove(fenced_buf);
@@ -183,6 +207,40 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
}
+/**
+ * Serialize writes, but allow concurrent reads.
+ */
+static INLINE enum pipe_error
+fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags)
+{
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
+ struct pipe_winsys *winsys = fenced_list->winsys;
+
+ /* Allow concurrent reads */
+ if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0)
+ return PIPE_OK;
+
+ /* Wait for the CPU to finish */
+ if(fenced_buf->mapcount) {
+ /* FIXME: Use thread conditions variables to signal when mapcount
+ * reaches zero */
+ debug_warning("attemp to write concurrently to buffer");
+ /* XXX: we must not fail here in order to support texture mipmap generation
+ return PIPE_ERROR_RETRY;
+ */
+ }
+
+ /* Wait for the GPU to finish */
+ if(fenced_buf->fence) {
+ if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0)
+ return PIPE_ERROR_RETRY;
+ _fenced_buffer_remove(fenced_buf);
+ }
+
+ return PIPE_OK;
+}
+
+
static void
fenced_buffer_destroy(struct pb_buffer *buf)
{
@@ -199,6 +257,7 @@ fenced_buffer_destroy(struct pb_buffer *buf)
prev = curr->prev;
do {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0);
_fenced_buffer_remove(fenced_buf);
curr = prev;
prev = curr->prev;
@@ -219,16 +278,30 @@ static void *
fenced_buffer_map(struct pb_buffer *buf,
unsigned flags)
{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- return pb_map(fenced_buf->buffer, flags);
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ void *map;
+ assert((flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE) == 0);
+
+ if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK)
+ return NULL;
+
+ map = pb_map(fenced_buf->buffer, flags);
+ if(map)
+ ++fenced_buf->mapcount;
+ fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ return map;
}
static void
fenced_buffer_unmap(struct pb_buffer *buf)
{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ assert(fenced_buf->mapcount);
pb_unmap(fenced_buf->buffer);
+ --fenced_buf->mapcount;
+ if(!fenced_buf->mapcount)
+ fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
@@ -281,16 +354,49 @@ void
buffer_fence(struct pb_buffer *buf,
struct pipe_fence_handle *fence)
{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
- struct pipe_winsys *winsys = fenced_list->winsys;
+ struct fenced_buffer *fenced_buf;
+ struct fenced_buffer_list *fenced_list;
+ struct pipe_winsys *winsys;
+ /* FIXME: receive this as a parameter */
+ unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0;
+
+ /* This is a public function, so be extra cautious with the buffer passed,
+ * as happens frequently to receive null buffers, or pointer to buffers
+ * other than fenced buffers. */
+ assert(buf);
+ if(!buf)
+ return;
+ assert(buf->vtbl == &fenced_buffer_vtbl);
+ if(buf->vtbl != &fenced_buffer_vtbl)
+ return;
+
+ fenced_buf = fenced_buffer(buf);
+ fenced_list = fenced_buf->list;
+ winsys = fenced_list->winsys;
+
+ if(fence == fenced_buf->fence) {
+ /* Handle the same fence case specially, not only because it is a fast
+ * path, but mostly to avoid serializing two writes with the same fence,
+ * as that would bring the hardware down to synchronous operation without
+ * any benefit.
+ */
+ fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ return;
+ }
+
+ if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) {
+ /* FIXME: propagate error */
+ (void)0;
+ }
_glthread_LOCK_MUTEX(fenced_list->mutex);
if (fenced_buf->fence)
_fenced_buffer_remove(fenced_buf);
- winsys->fence_reference(winsys, &fenced_buf->fence, fence);
- if (fenced_buf->fence)
+ if (fence) {
+ winsys->fence_reference(winsys, &fenced_buf->fence, fence);
+ fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
_fenced_buffer_add(fenced_buf);
+ }
_glthread_UNLOCK_MUTEX(fenced_list->mutex);
}
@@ -334,7 +440,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
/* Wait on outstanding fences */
while (fenced_list->numDelayed) {
_glthread_UNLOCK_MUTEX(fenced_list->mutex);
-#ifndef WIN32
+#if defined(PIPE_OS_LINUX)
sched_yield();
#endif
_fenced_buffer_list_check_free(fenced_list, 1);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index 0cf8e92e37..8de286e3f9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -80,7 +80,7 @@ struct pb_manager
/**
- * Static buffer pool manager.
+ * Static buffer pool sub-allocator.
*
* Manages the allocation of equally sized buffers. It does so by allocating
* a single big buffer and divide it equally sized buffers.
@@ -94,7 +94,7 @@ pool_bufmgr_create(struct pb_manager *provider,
/**
- * Wraper around the old memory manager.
+ * Static sub-allocator based the old memory manager.
*
* It managers buffers of different sizes. It does so by allocating a buffer
* with the size of the heap, and then using the old mm memory manager to manage
@@ -114,6 +114,40 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
size_t size, size_t align2);
+/**
+ * Slab sub-allocator.
+ */
+struct pb_manager *
+pb_slab_manager_create(struct pb_manager *provider,
+ size_t bufSize,
+ size_t slabSize,
+ const struct pb_desc *desc);
+
+/**
+ * Allow a range of buffer size, by aggregating multiple slabs sub-allocators
+ * with different bucket sizes.
+ */
+struct pb_manager *
+pb_slab_range_manager_create(struct pb_manager *provider,
+ size_t minBufSize,
+ size_t maxBufSize,
+ size_t slabSize,
+ const struct pb_desc *desc);
+
+
+/**
+ * Time-based buffer cache.
+ *
+ * This manager keeps a cache of destroyed buffers during a time interval.
+ */
+struct pb_manager *
+pb_cache_manager_create(struct pb_manager *provider,
+ unsigned usecs);
+
+void
+pb_cache_flush(struct pb_manager *mgr);
+
+
/**
* Fenced buffer manager.
*
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
new file mode 100644
index 0000000000..4bd3f94a6c
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -0,0 +1,344 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Buffer cache.
+ *
+ * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_thread.h"
+#include "pipe/p_util.h"
+#include "util/u_double_list.h"
+#include "util/u_time.h"
+
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+/**
+ * Convenience macro (type safe).
+ */
+#define SUPER(__derived) (&(__derived)->base)
+
+
+struct pb_cache_manager;
+
+
+/**
+ * Wrapper around a pipe buffer which adds delayed destruction.
+ */
+struct pb_cache_buffer
+{
+ struct pb_buffer base;
+
+ struct pb_buffer *buffer;
+ struct pb_cache_manager *mgr;
+
+ /** Caching time interval */
+ struct util_time start, end;
+
+ struct list_head head;
+};
+
+
+struct pb_cache_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+ unsigned usecs;
+
+ _glthread_Mutex mutex;
+
+ struct list_head delayed;
+ size_t numDelayed;
+};
+
+
+static INLINE struct pb_cache_buffer *
+pb_cache_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ return (struct pb_cache_buffer *)buf;
+}
+
+
+static INLINE struct pb_cache_manager *
+pb_cache_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_cache_manager *)mgr;
+}
+
+
+/**
+ * Actually destroy the buffer.
+ */
+static INLINE void
+_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
+{
+ struct pb_cache_manager *mgr = buf->mgr;
+
+ LIST_DEL(&buf->head);
+ assert(mgr->numDelayed);
+ --mgr->numDelayed;
+ assert(!buf->base.base.refcount);
+ pb_reference(&buf->buffer, NULL);
+ FREE(buf);
+}
+
+
+/**
+ * Free as many cache buffers from the list head as possible.
+ */
+static void
+_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr)
+{
+ struct list_head *curr, *next;
+ struct pb_cache_buffer *buf;
+ struct util_time now;
+
+ util_time_get(&now);
+
+ curr = mgr->delayed.next;
+ next = curr->next;
+ while(curr != &mgr->delayed) {
+ buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+
+ if(!util_time_timeout(&buf->start, &buf->end, &now))
+ break;
+
+ _pb_cache_buffer_destroy(buf);
+
+ curr = next;
+ next = curr->next;
+ }
+}
+
+
+static void
+pb_cache_buffer_destroy(struct pb_buffer *_buf)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ struct pb_cache_manager *mgr = buf->mgr;
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ assert(buf->base.base.refcount == 0);
+
+ _pb_cache_buffer_list_check_free(mgr);
+
+ util_time_get(&buf->start);
+ util_time_add(&buf->start, mgr->usecs, &buf->end);
+ LIST_ADDTAIL(&buf->head, &mgr->delayed);
+ ++mgr->numDelayed;
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+}
+
+
+static void *
+pb_cache_buffer_map(struct pb_buffer *_buf,
+ unsigned flags)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ return pb_map(buf->buffer, flags);
+}
+
+
+static void
+pb_cache_buffer_unmap(struct pb_buffer *_buf)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ pb_unmap(buf->buffer);
+}
+
+
+static void
+pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
+ pb_get_base_buffer(buf->buffer, base_buf, offset);
+}
+
+
+const struct pb_vtbl
+pb_cache_buffer_vtbl = {
+ pb_cache_buffer_destroy,
+ pb_cache_buffer_map,
+ pb_cache_buffer_unmap,
+ pb_cache_buffer_get_base_buffer
+};
+
+
+static INLINE boolean
+pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ /* TODO: be more lenient with size */
+ if(buf->base.base.size != size)
+ return FALSE;
+
+ if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
+ return FALSE;
+
+ /* XXX: check usage too? */
+
+ return TRUE;
+}
+
+
+static struct pb_buffer *
+pb_cache_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
+ struct pb_cache_buffer *buf;
+ struct pb_cache_buffer *curr_buf;
+ struct list_head *curr, *next;
+ struct util_time now;
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+
+ buf = NULL;
+ curr = mgr->delayed.next;
+ next = curr->next;
+
+ /* search in the expired buffers, freeing them in the process */
+ util_time_get(&now);
+ while(curr != &mgr->delayed) {
+ curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+ if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
+ buf = curr_buf;
+ else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now))
+ _pb_cache_buffer_destroy(curr_buf);
+ curr = next;
+ next = curr->next;
+ }
+
+ /* keep searching in the hot buffers */
+ while(!buf && curr != &mgr->delayed) {
+ curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+ if(pb_cache_is_buffer_compat(curr_buf, size, desc))
+ buf = curr_buf;
+ curr = next;
+ next = curr->next;
+ }
+
+ if(buf) {
+ LIST_DEL(&buf->head);
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+ ++buf->base.base.refcount;
+ return &buf->base;
+ }
+
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+
+ buf = CALLOC_STRUCT(pb_cache_buffer);
+ if(!buf)
+ return NULL;
+
+ buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
+ if(!buf->buffer) {
+ FREE(buf);
+ return NULL;
+ }
+
+ assert(buf->buffer->base.refcount >= 1);
+ assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment));
+ assert((buf->buffer->base.usage & desc->usage) == desc->usage);
+ assert(buf->buffer->base.size >= size);
+
+ buf->base.base.refcount = 1;
+ buf->base.base.alignment = buf->buffer->base.alignment;
+ buf->base.base.usage = buf->buffer->base.usage;
+ buf->base.base.size = buf->buffer->base.size;
+
+ buf->base.vtbl = &pb_cache_buffer_vtbl;
+ buf->mgr = mgr;
+
+ return &buf->base;
+}
+
+
+void
+pb_cache_flush(struct pb_manager *_mgr)
+{
+ struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
+ struct list_head *curr, *next;
+ struct pb_cache_buffer *buf;
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ curr = mgr->delayed.next;
+ next = curr->next;
+ while(curr != &mgr->delayed) {
+ buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+ _pb_cache_buffer_destroy(buf);
+ curr = next;
+ next = curr->next;
+ }
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+}
+
+
+static void
+pb_cache_manager_destroy(struct pb_manager *mgr)
+{
+ pb_cache_flush(mgr);
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_cache_manager_create(struct pb_manager *provider,
+ unsigned usecs)
+{
+ struct pb_cache_manager *mgr;
+
+ mgr = (struct pb_cache_manager *)CALLOC(1, sizeof(*mgr));
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_cache_manager_destroy;
+ mgr->base.create_buffer = pb_cache_manager_create_buffer;
+ mgr->provider = provider;
+ mgr->usecs = usecs;
+ LIST_INITHEAD(&mgr->delayed);
+ mgr->numDelayed = 0;
+ _glthread_INIT_MUTEX(mgr->mutex);
+
+ return &mgr->base;
+}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index bffca5b244..9d809e2f9b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -30,7 +30,7 @@
* \file
* A buffer manager that wraps buffers in fenced buffers.
*
- * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
+ * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
*/
@@ -101,7 +101,8 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
fenced_buffer_list_destroy(fenced_mgr->fenced_list);
- fenced_mgr->provider->destroy(fenced_mgr->provider);
+ if(fenced_mgr->provider)
+ fenced_mgr->provider->destroy(fenced_mgr->provider);
FREE(fenced_mgr);
}
@@ -113,6 +114,9 @@ fenced_bufmgr_create(struct pb_manager *provider,
{
struct fenced_pb_manager *fenced_mgr;
+ if(!provider)
+ return NULL;
+
fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr));
if (!fenced_mgr)
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
new file mode 100644
index 0000000000..b931455056
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -0,0 +1,495 @@
+/**************************************************************************
+ *
+ * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, FREE of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * S-lab pool implementation.
+ *
+ * @author Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_error.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_thread.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "util/u_double_list.h"
+#include "util/u_time.h"
+
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+#define DRI_SLABPOOL_ALLOC_RETRIES 100
+
+
+struct pb_slab;
+
+struct pb_slab_buffer
+{
+ struct pb_buffer base;
+
+ struct pb_slab *slab;
+ struct list_head head;
+ unsigned mapCount;
+ size_t start;
+ _glthread_Cond event;
+};
+
+struct pb_slab
+{
+ struct list_head head;
+ struct list_head freeBuffers;
+ size_t numBuffers;
+ size_t numFree;
+ struct pb_slab_buffer *buffers;
+ struct pb_slab_manager *mgr;
+
+ struct pb_buffer *bo;
+ void *virtual;
+};
+
+struct pb_slab_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+ size_t bufSize;
+ size_t slabSize;
+ struct pb_desc desc;
+
+ struct list_head slabs;
+ struct list_head freeSlabs;
+
+ _glthread_Mutex mutex;
+};
+
+/**
+ * The data of this structure remains constant after
+ * initialization and thus needs no mutex protection.
+ */
+struct pb_slab_range_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+ size_t minBufSize;
+ size_t maxBufSize;
+ struct pb_desc desc;
+
+ unsigned numBuckets;
+ size_t *bucketSizes;
+ struct pb_manager **buckets;
+};
+
+
+static INLINE struct pb_slab_buffer *
+pb_slab_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ return (struct pb_slab_buffer *)buf;
+}
+
+
+static INLINE struct pb_slab_manager *
+pb_slab_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_slab_manager *)mgr;
+}
+
+
+static INLINE struct pb_slab_range_manager *
+pb_slab_range_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_slab_range_manager *)mgr;
+}
+
+
+/**
+ * Delete a buffer from the slab delayed list and put
+ * it on the slab FREE list.
+ */
+static void
+pb_slab_buffer_destroy(struct pb_buffer *_buf)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ struct pb_slab *slab = buf->slab;
+ struct pb_slab_manager *mgr = slab->mgr;
+ struct list_head *list = &buf->head;
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+
+ assert(buf->base.base.refcount == 0);
+
+ buf->mapCount = 0;
+
+ LIST_DEL(list);
+ LIST_ADDTAIL(list, &slab->freeBuffers);
+ slab->numFree++;
+
+ if (slab->head.next == &slab->head)
+ LIST_ADDTAIL(&slab->head, &mgr->slabs);
+
+ if (slab->numFree == slab->numBuffers) {
+ list = &slab->head;
+ LIST_DEL(list);
+ LIST_ADDTAIL(list, &mgr->freeSlabs);
+ }
+
+ if (mgr->slabs.next == &mgr->slabs || slab->numFree
+ != slab->numBuffers) {
+
+ struct list_head *next;
+
+ for (list = mgr->freeSlabs.next, next = list->next; list
+ != &mgr->freeSlabs; list = next, next = list->next) {
+
+ slab = LIST_ENTRY(struct pb_slab, list, head);
+
+ LIST_DELINIT(list);
+ pb_reference(&slab->bo, NULL);
+ FREE(slab->buffers);
+ FREE(slab);
+ }
+ }
+
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+}
+
+
+static void *
+pb_slab_buffer_map(struct pb_buffer *_buf,
+ unsigned flags)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+
+ ++buf->mapCount;
+ return (void *) ((uint8_t *) buf->slab->virtual + buf->start);
+}
+
+
+static void
+pb_slab_buffer_unmap(struct pb_buffer *_buf)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+
+ --buf->mapCount;
+ if (buf->mapCount == 0)
+ _glthread_COND_BROADCAST(buf->event);
+}
+
+
+static void
+pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
+ pb_get_base_buffer(buf->slab->bo, base_buf, offset);
+ *offset += buf->start;
+}
+
+
+static const struct pb_vtbl
+pb_slab_buffer_vtbl = {
+ pb_slab_buffer_destroy,
+ pb_slab_buffer_map,
+ pb_slab_buffer_unmap,
+ pb_slab_buffer_get_base_buffer
+};
+
+
+static enum pipe_error
+pb_slab_create(struct pb_slab_manager *mgr)
+{
+ struct pb_slab *slab;
+ struct pb_slab_buffer *buf;
+ unsigned numBuffers;
+ unsigned i;
+ enum pipe_error ret;
+
+ slab = CALLOC_STRUCT(pb_slab);
+ if (!slab)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /*
+ * FIXME: We should perhaps allow some variation in slabsize in order
+ * to efficiently reuse slabs.
+ */
+
+ slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc);
+ if(!slab->bo) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out_err0;
+ }
+
+ slab->virtual = pb_map(slab->bo,
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(!slab->virtual) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out_err1;
+ }
+
+ pb_unmap(slab->bo);
+
+ numBuffers = slab->bo->base.size / mgr->bufSize;
+
+ slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers));
+ if (!slab->buffers) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out_err1;
+ }
+
+ LIST_INITHEAD(&slab->head);
+ LIST_INITHEAD(&slab->freeBuffers);
+ slab->numBuffers = numBuffers;
+ slab->numFree = 0;
+ slab->mgr = mgr;
+
+ buf = slab->buffers;
+ for (i=0; i < numBuffers; ++i) {
+ buf->base.base.refcount = 0;
+ buf->base.base.size = mgr->bufSize;
+ buf->base.base.alignment = 0;
+ buf->base.base.usage = 0;
+ buf->base.vtbl = &pb_slab_buffer_vtbl;
+ buf->slab = slab;
+ buf->start = i* mgr->bufSize;
+ buf->mapCount = 0;
+ _glthread_INIT_COND(buf->event);
+ LIST_ADDTAIL(&buf->head, &slab->freeBuffers);
+ slab->numFree++;
+ buf++;
+ }
+
+ LIST_ADDTAIL(&slab->head, &mgr->slabs);
+
+ return PIPE_OK;
+
+out_err1:
+ pb_reference(&slab->bo, NULL);
+out_err0:
+ FREE(slab);
+ return ret;
+}
+
+
+static struct pb_buffer *
+pb_slab_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
+ static struct pb_slab_buffer *buf;
+ struct pb_slab *slab;
+ struct list_head *list;
+ int count = DRI_SLABPOOL_ALLOC_RETRIES;
+
+ /* check size */
+ assert(size == mgr->bufSize);
+ if(size != mgr->bufSize)
+ return NULL;
+
+ /* check if we can provide the requested alignment */
+ assert(pb_check_alignment(desc->alignment, mgr->desc.alignment));
+ if(!pb_check_alignment(desc->alignment, mgr->desc.alignment))
+ return NULL;
+ assert(pb_check_alignment(desc->alignment, mgr->bufSize));
+ if(!pb_check_alignment(desc->alignment, mgr->bufSize))
+ return NULL;
+
+ /* XXX: check for compatible buffer usage too? */
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ while (mgr->slabs.next == &mgr->slabs && count > 0) {
+ if (mgr->slabs.next != &mgr->slabs)
+ break;
+
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+ if (count != DRI_SLABPOOL_ALLOC_RETRIES)
+ util_time_sleep(1);
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ (void) pb_slab_create(mgr);
+ count--;
+ }
+
+ list = mgr->slabs.next;
+ if (list == &mgr->slabs) {
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+ return NULL;
+ }
+ slab = LIST_ENTRY(struct pb_slab, list, head);
+ if (--slab->numFree == 0)
+ LIST_DELINIT(list);
+
+ list = slab->freeBuffers.next;
+ LIST_DELINIT(list);
+
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+ buf = LIST_ENTRY(struct pb_slab_buffer, list, head);
+
+ ++buf->base.base.refcount;
+ buf->base.base.alignment = desc->alignment;
+ buf->base.base.usage = desc->usage;
+
+ return &buf->base;
+}
+
+
+static void
+pb_slab_manager_destroy(struct pb_manager *_mgr)
+{
+ struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
+
+ /* TODO: cleanup all allocated buffers */
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_slab_manager_create(struct pb_manager *provider,
+ size_t bufSize,
+ size_t slabSize,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_manager *mgr;
+
+ mgr = CALLOC_STRUCT(pb_slab_manager);
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_slab_manager_destroy;
+ mgr->base.create_buffer = pb_slab_manager_create_buffer;
+
+ mgr->provider = provider;
+ mgr->bufSize = bufSize;
+ mgr->slabSize = slabSize;
+ mgr->desc = *desc;
+
+ LIST_INITHEAD(&mgr->slabs);
+ LIST_INITHEAD(&mgr->freeSlabs);
+
+ _glthread_INIT_MUTEX(mgr->mutex);
+
+ return &mgr->base;
+}
+
+
+static struct pb_buffer *
+pb_slab_range_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
+ size_t bufSize;
+ unsigned i;
+
+ bufSize = mgr->minBufSize;
+ for (i = 0; i < mgr->numBuckets; ++i) {
+ if(bufSize >= size)
+ return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc);
+ bufSize *= 2;
+ }
+
+ /* Fall back to allocate a buffer object directly from the provider. */
+ return mgr->provider->create_buffer(mgr->provider, size, desc);
+}
+
+
+static void
+pb_slab_range_manager_destroy(struct pb_manager *_mgr)
+{
+ struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
+ unsigned i;
+
+ for (i = 0; i < mgr->numBuckets; ++i)
+ mgr->buckets[i]->destroy(mgr->buckets[i]);
+ FREE(mgr->buckets);
+ FREE(mgr->bucketSizes);
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_slab_range_manager_create(struct pb_manager *provider,
+ size_t minBufSize,
+ size_t maxBufSize,
+ size_t slabSize,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_range_manager *mgr;
+ size_t bufSize;
+ unsigned i;
+
+ mgr = CALLOC_STRUCT(pb_slab_range_manager);
+ if (!mgr)
+ goto out_err0;
+
+ mgr->base.destroy = pb_slab_range_manager_destroy;
+ mgr->base.create_buffer = pb_slab_range_manager_create_buffer;
+
+ mgr->provider = provider;
+ mgr->minBufSize = minBufSize;
+ mgr->maxBufSize = maxBufSize;
+
+ mgr->numBuckets = 1;
+ bufSize = minBufSize;
+ while(bufSize < maxBufSize) {
+ bufSize *= 2;
+ ++mgr->numBuckets;
+ }
+
+ mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets));
+ if (!mgr->buckets)
+ goto out_err1;
+
+ bufSize = minBufSize;
+ for (i = 0; i < mgr->numBuckets; ++i) {
+ mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc);
+ if(!mgr->buckets[i])
+ goto out_err2;
+ bufSize *= 2;
+ }
+
+ return &mgr->base;
+
+out_err2:
+ for (i = 0; i < mgr->numBuckets; ++i)
+ if(mgr->buckets[i])
+ mgr->buckets[i]->destroy(mgr->buckets[i]);
+ FREE(mgr->buckets);
+out_err1:
+ FREE(mgr);
+out_err0:
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index eb3359750b..f01e12faa0 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -26,14 +26,29 @@
**************************************************************************/
+#include "pipe/p_debug.h"
#include "rtasm_cpu.h"
+static boolean rtasm_sse_enabled(void)
+{
+ static boolean firsttime = 1;
+ static boolean enabled;
+
+ /* This gets called quite often at the moment:
+ */
+ if (firsttime) {
+ enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE);
+ firsttime = FALSE;
+ }
+ return enabled;
+}
+
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
@@ -42,8 +57,8 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 4d33950e99..4e036d9032 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -33,15 +33,114 @@
#define DISASSEM 0
#define X86_TWOB 0x0f
-static unsigned char *cptr( void (*label)() )
-{
- return (unsigned char *) label;
-}
+
+#define DUMP_SSE 0
+
+#if DUMP_SSE
+
+static void
+_print_reg(
+ struct x86_reg reg )
+{
+ if (reg.mod != mod_REG)
+ debug_printf( "[" );
+
+ switch( reg.file ) {
+ case file_REG32:
+ switch( reg.idx ) {
+ case reg_AX: debug_printf( "EAX" ); break;
+ case reg_CX: debug_printf( "ECX" ); break;
+ case reg_DX: debug_printf( "EDX" ); break;
+ case reg_BX: debug_printf( "EBX" ); break;
+ case reg_SP: debug_printf( "ESP" ); break;
+ case reg_BP: debug_printf( "EBP" ); break;
+ case reg_SI: debug_printf( "ESI" ); break;
+ case reg_DI: debug_printf( "EDI" ); break;
+ }
+ break;
+ case file_MMX:
+ debug_printf( "MMX%u", reg.idx );
+ break;
+ case file_XMM:
+ debug_printf( "XMM%u", reg.idx );
+ break;
+ case file_x87:
+ debug_printf( "fp%u", reg.idx );
+ break;
+ }
+
+ if (reg.mod == mod_DISP8 ||
+ reg.mod == mod_DISP32)
+ debug_printf("+%d", reg.disp);
+
+ if (reg.mod != mod_REG)
+ debug_printf( "]" );
+}
+
+
+#define DUMP_START() debug_printf( "\n" )
+#define DUMP_END() debug_printf( "\n" )
+
+#define DUMP() do { \
+ const char *foo = __FUNCTION__; \
+ while (*foo && *foo != '_') \
+ foo++; \
+ if (*foo) \
+ foo++; \
+ debug_printf( "\n% 15s ", foo ); \
+} while (0)
+
+#define DUMP_I( I ) do { \
+ DUMP(); \
+ debug_printf( "%u", I ); \
+} while( 0 )
+
+#define DUMP_R( R0 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+} while( 0 )
+
+#define DUMP_RR( R0, R1 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+} while( 0 )
+
+#define DUMP_RI( R0, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#define DUMP_RRI( R0, R1, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#else
+
+#define DUMP_START()
+#define DUMP_END()
+#define DUMP( )
+#define DUMP_I( I )
+#define DUMP_R( R0 )
+#define DUMP_RR( R0, R1 )
+#define DUMP_RI( R0, I )
+#define DUMP_RRI( R0, R1, I )
+
+#endif
static void do_realloc( struct x86_function *p )
{
- if (p->size == 0) {
+ if (p->store == p->error_overflow) {
+ p->csr = p->store;
+ }
+ else if (p->size == 0) {
p->size = 1024;
p->store = rtasm_exec_malloc(p->size);
p->csr = p->store;
@@ -51,10 +150,22 @@ static void do_realloc( struct x86_function *p )
unsigned char *tmp = p->store;
p->size *= 2;
p->store = rtasm_exec_malloc(p->size);
- memcpy(p->store, tmp, used);
- p->csr = p->store + used;
+
+ if (p->store) {
+ memcpy(p->store, tmp, used);
+ p->csr = p->store + used;
+ }
+ else {
+ p->csr = p->store;
+ }
+
rtasm_exec_free(tmp);
}
+
+ if (p->store == NULL) {
+ p->store = p->csr = p->error_overflow;
+ p->size = sizeof(p->error_overflow);
+ }
}
/* Emit bytes to the instruction stream:
@@ -236,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg )
return x86_make_reg( reg.file, reg.idx );
}
-unsigned char *x86_get_label( struct x86_function *p )
+int x86_get_label( struct x86_function *p )
{
- return p->csr;
+ return p->csr - p->store;
}
@@ -250,16 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p )
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- unsigned char *label )
+ int label )
{
- intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
+ int offset = label - (x86_get_label(p) + 2);
+ DUMP_I(cc);
+ if (offset < 0) {
+ int amt = p->csr - p->store;
+ assert(amt > -offset);
+ }
+
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
emit_1b(p, (char) offset);
}
else {
- offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6);
+ offset = label - (x86_get_label(p) + 6);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, offset);
}
@@ -267,23 +384,27 @@ void x86_jcc( struct x86_function *p,
/* Always use a 32bit offset for forward jumps:
*/
-unsigned char *x86_jcc_forward( struct x86_function *p,
- enum x86_cc cc )
+int x86_jcc_forward( struct x86_function *p,
+ enum x86_cc cc )
{
+ DUMP_I(cc);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, 0);
return x86_get_label(p);
}
-unsigned char *x86_jmp_forward( struct x86_function *p)
+int x86_jmp_forward( struct x86_function *p)
{
+ DUMP();
emit_1ub(p, 0xe9);
emit_1i(p, 0);
return x86_get_label(p);
}
-unsigned char *x86_call_forward( struct x86_function *p)
+int x86_call_forward( struct x86_function *p)
{
+ DUMP();
+
emit_1ub(p, 0xe8);
emit_1i(p, 0);
return x86_get_label(p);
@@ -292,34 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p)
/* Fixup offset from forward jump:
*/
void x86_fixup_fwd_jump( struct x86_function *p,
- unsigned char *fixup )
+ int fixup )
{
- *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup );
+ *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
}
-void x86_jmp( struct x86_function *p, unsigned char *label)
+void x86_jmp( struct x86_function *p, int label)
{
+ DUMP_I( label );
emit_1ub(p, 0xe9);
- emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
+ emit_1i(p, label - x86_get_label(p) - 4);
}
-#if 0
-/* This doesn't work once we start reallocating & copying the
- * generated code on buffer fills, because the call is relative to the
- * current pc.
- */
-void x86_call( struct x86_function *p, void (*label)())
-{
- emit_1ub(p, 0xe8);
- emit_1i(p, cptr(label) - x86_get_label(p) - 4);
-}
-#else
void x86_call( struct x86_function *p, struct x86_reg reg)
{
+ DUMP_R( reg );
emit_1ub(p, 0xff);
- emit_modrm(p, reg, reg);
+ emit_modrm_noreg(p, 2, reg);
}
-#endif
/* michal:
@@ -328,6 +439,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg)
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
+ DUMP_RI( dst, imm );
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
@@ -336,14 +448,23 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x50 + reg.idx);
+ DUMP_R( reg );
+ if (reg.mod == mod_REG)
+ emit_1ub(p, 0x50 + reg.idx);
+ else
+ {
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 6, reg);
+ }
+
+
p->stack_offset += 4;
}
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
p->stack_offset -= 4;
@@ -352,6 +473,7 @@ void x86_pop( struct x86_function *p,
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x40 + reg.idx);
}
@@ -359,17 +481,27 @@ void x86_inc( struct x86_function *p,
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x48 + reg.idx);
}
void x86_ret( struct x86_function *p )
{
+ DUMP();
+ assert(p->stack_offset == 0);
emit_1ub(p, 0xc3);
}
+void x86_retw( struct x86_function *p, unsigned short imm )
+{
+ DUMP();
+ emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff);
+}
+
void x86_sahf( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9e);
}
@@ -377,6 +509,7 @@ void x86_mov( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
@@ -384,6 +517,7 @@ void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x33, 0x31, dst, src );
}
@@ -391,6 +525,7 @@ void x86_cmp( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x3b, 0x39, dst, src );
}
@@ -398,6 +533,7 @@ void x86_lea( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x8d);
emit_modrm( p, dst, src );
}
@@ -406,6 +542,7 @@ void x86_test( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x85);
emit_modrm( p, dst, src );
}
@@ -414,20 +551,36 @@ void x86_add( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x03, 0x01, dst, src );
}
+/* Calculate EAX * src, results in EDX:EAX.
+ */
void x86_mul( struct x86_function *p,
struct x86_reg src )
{
- assert (src.file == file_REG32 && src.mod == mod_REG);
- emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
+ DUMP_R( src );
+ emit_1ub(p, 0xf7);
+ emit_modrm_noreg(p, 4, src );
+}
+
+
+void x86_imul( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0xAF);
+ emit_modrm(p, dst, src);
}
+
void x86_sub( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
@@ -435,6 +588,7 @@ void x86_or( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x0b, 0x09, dst, src );
}
@@ -442,6 +596,7 @@ void x86_and( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
@@ -456,6 +611,7 @@ void sse_movss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0xF3, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -464,6 +620,7 @@ void sse_movaps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x28, 0x29, dst, src );
}
@@ -472,6 +629,7 @@ void sse_movups( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -480,6 +638,7 @@ void sse_movhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
@@ -489,6 +648,7 @@ void sse_movlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
@@ -498,6 +658,7 @@ void sse_maxps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -506,6 +667,7 @@ void sse_maxss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -514,6 +676,7 @@ void sse_divss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
emit_modrm( p, dst, src );
}
@@ -522,6 +685,7 @@ void sse_minps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5D);
emit_modrm( p, dst, src );
}
@@ -530,6 +694,7 @@ void sse_subps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5C);
emit_modrm( p, dst, src );
}
@@ -538,6 +703,7 @@ void sse_mulps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -546,6 +712,7 @@ void sse_mulss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -554,6 +721,7 @@ void sse_addps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -562,6 +730,7 @@ void sse_addss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -570,6 +739,7 @@ void sse_andnps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x55);
emit_modrm( p, dst, src );
}
@@ -578,6 +748,7 @@ void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x54);
emit_modrm( p, dst, src );
}
@@ -586,6 +757,7 @@ void sse_rsqrtps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
}
@@ -594,6 +766,7 @@ void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
@@ -603,6 +776,7 @@ void sse_movhlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x12);
emit_modrm( p, dst, src );
@@ -612,6 +786,7 @@ void sse_movlhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x16);
emit_modrm( p, dst, src );
@@ -621,6 +796,7 @@ void sse_orps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x56);
emit_modrm( p, dst, src );
}
@@ -629,6 +805,7 @@ void sse_xorps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x57);
emit_modrm( p, dst, src );
}
@@ -637,6 +814,7 @@ void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_XMM || src.mod != mod_REG));
@@ -646,36 +824,62 @@ void sse_cvtps2pi( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtdq2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0x5b);
+ emit_modrm( p, dst, src );
+}
+
/* Shufps can also be used to implement a reduced swizzle when dest ==
* arg0.
*/
void sse_shufps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_2ub(p, X86_TWOB, 0xC6);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
+void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub( p, X86_TWOB, 0x15 );
+ emit_modrm( p, dst, src );
+}
+
+void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub( p, X86_TWOB, 0x14 );
+ emit_modrm( p, dst, src );
+}
+
void sse_cmpps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char cc)
{
+ DUMP_RRI( dst, src, cc );
emit_2ub(p, X86_TWOB, 0xC2);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, cc);
}
void sse_pmovmskb( struct x86_function *p,
- struct x86_reg dest,
+ struct x86_reg dst,
struct x86_reg src)
{
- emit_3ub(p, 0x66, X86_TWOB, 0xD7);
- emit_modrm(p, dest, src);
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+ emit_modrm(p, dst, src);
}
/***********************************************************************
@@ -686,12 +890,13 @@ void sse_pmovmskb( struct x86_function *p,
* Perform a reduced swizzle:
*/
void sse2_pshufd( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0x66, X86_TWOB, 0x70);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
@@ -699,6 +904,7 @@ void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
emit_modrm( p, dst, src );
}
@@ -707,6 +913,7 @@ void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x5B);
emit_modrm( p, dst, src );
}
@@ -715,6 +922,7 @@ void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x6B);
emit_modrm( p, dst, src );
}
@@ -723,6 +931,7 @@ void sse2_packsswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x63);
emit_modrm( p, dst, src );
}
@@ -731,14 +940,26 @@ void sse2_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x67);
emit_modrm( p, dst, src );
}
+void sse2_punpcklbw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0x60);
+ emit_modrm( p, dst, src );
+}
+
+
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -747,6 +968,7 @@ void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -755,6 +977,7 @@ void sse2_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0x66, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
}
@@ -767,30 +990,35 @@ void sse2_movd( struct x86_function *p,
*/
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 2, dst);
}
void x87_fistp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
}
void x87_fldz( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xee);
}
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_REG32);
assert(arg.mod != mod_REG);
emit_1ub(p, 0xd9);
@@ -799,26 +1027,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg )
void x87_fld1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe8);
}
void x87_fldl2e( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xea);
}
void x87_fldln2( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xed);
}
void x87_fwait( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9b);
}
void x87_fnclex( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xdb, 0xe2);
}
@@ -855,49 +1088,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86
assert(0);
}
-void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc8,
0xdc, 0xc8,
4);
}
-void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe0,
0xdc, 0xe8,
4);
}
-void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe8,
0xdc, 0xe0,
5);
}
-void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc0,
0xdc, 0xc0,
0);
}
-void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf0,
0xdc, 0xf8,
6);
}
-void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf8,
0xdc, 0xf0,
7);
@@ -905,6 +1144,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
@@ -912,6 +1152,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
@@ -919,6 +1160,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
@@ -926,6 +1168,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
void x87_faddp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
@@ -933,6 +1176,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
@@ -940,6 +1184,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
@@ -947,70 +1192,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
void x87_fucom( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe0+arg.idx);
}
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
}
void x87_fucompp( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xda, 0xe9);
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xd9, 0xc8+arg.idx);
}
void x87_fabs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe1);
}
void x87_fchs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe0);
}
void x87_fcos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xff);
}
void x87_fprndint( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfc);
}
void x87_fscale( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfd);
}
void x87_fsin( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfe);
}
void x87_fsincos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfb);
}
void x87_fsqrt( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfa);
}
void x87_fxtract( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf4);
}
@@ -1020,6 +1278,7 @@ void x87_fxtract( struct x86_function *p )
*/
void x87_f2xm1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf0);
}
@@ -1028,6 +1287,7 @@ void x87_f2xm1( struct x86_function *p )
*/
void x87_fyl2x( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf1);
}
@@ -1038,12 +1298,14 @@ void x87_fyl2x( struct x86_function *p )
*/
void x87_fyl2xp1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf9);
}
void x87_fld( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
if (arg.file == file_x87)
emit_2ub(p, 0xd9, 0xc0 + arg.idx);
else {
@@ -1054,6 +1316,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
void x87_fst( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd0 + dst.idx);
else {
@@ -1064,6 +1327,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst )
void x87_fstp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd8 + dst.idx);
else {
@@ -1074,6 +1338,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd0 + dst.idx);
else {
@@ -1084,6 +1349,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd8 + dst.idx);
else {
@@ -1095,6 +1361,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_REG32);
if (dst.idx == reg_AX &&
@@ -1115,6 +1382,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
void mmx_emms( struct x86_function *p )
{
+ DUMP();
assert(p->need_emms);
emit_2ub(p, 0x0f, 0x77);
p->need_emms = 0;
@@ -1124,6 +1392,7 @@ void mmx_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1137,6 +1406,7 @@ void mmx_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1150,6 +1420,7 @@ void mmx_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
@@ -1159,6 +1430,7 @@ void mmx_movq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6f, 0x7f, dst, src );
@@ -1186,18 +1458,25 @@ void x86_init_func( struct x86_function *p )
p->size = 0;
p->store = NULL;
p->csr = p->store;
+ DUMP_START();
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
{
p->size = code_size;
p->store = rtasm_exec_malloc(code_size);
+ if (p->store == NULL) {
+ p->store = p->error_overflow;
+ }
p->csr = p->store;
+ DUMP_START();
}
void x86_release_func( struct x86_function *p )
{
- rtasm_exec_free(p->store);
+ if (p->store && p->store != p->error_overflow)
+ rtasm_exec_free(p->store);
+
p->store = NULL;
p->csr = NULL;
p->size = 0;
@@ -1206,9 +1485,14 @@ void x86_release_func( struct x86_function *p )
void (*x86_get_func( struct x86_function *p ))(void)
{
+ DUMP_END();
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
- return (void (*)(void)) p->store;
+
+ if (p->store == p->error_overflow)
+ return (void (*)(void)) NULL;
+ else
+ return (void (*)(void)) p->store;
}
#else
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 606b41eb35..eacaeeaf6f 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -43,6 +43,7 @@ struct x86_function {
unsigned char *csr;
unsigned stack_offset;
int need_emms;
+ unsigned char error_overflow[4];
const char *fn;
};
@@ -123,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
/* Labels, jumps and fixup:
*/
-unsigned char *x86_get_label( struct x86_function *p );
+int x86_get_label( struct x86_function *p );
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- unsigned char *label );
+ int label );
-unsigned char *x86_jcc_forward( struct x86_function *p,
+int x86_jcc_forward( struct x86_function *p,
enum x86_cc cc );
-unsigned char *x86_jmp_forward( struct x86_function *p);
+int x86_jmp_forward( struct x86_function *p);
-unsigned char *x86_call_forward( struct x86_function *p);
+int x86_call_forward( struct x86_function *p);
void x86_fixup_fwd_jump( struct x86_function *p,
- unsigned char *fixup );
+ int fixup );
-void x86_jmp( struct x86_function *p, unsigned char *label );
+void x86_jmp( struct x86_function *p, int label );
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
@@ -165,6 +166,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -201,7 +203,10 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
+void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -211,10 +216,12 @@ void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
+void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_ret( struct x86_function *p );
+void x86_retw( struct x86_function *p, unsigned short imm );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c
index 97ee5882a1..5e4126e014 100644
--- a/src/gallium/auxiliary/sct/sct.c
+++ b/src/gallium/auxiliary/sct/sct.c
@@ -209,6 +209,7 @@ remove_context_from_surface(struct sct_surface *si,
}
else {
prev = curr;
+ next = curr->next;
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
new file mode 100644
index 0000000000..451911a354
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/Makefile
@@ -0,0 +1,2 @@
+default:
+ cd .. ; make
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 78e7dec569..826b432f09 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -88,6 +88,10 @@
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
+#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
+#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
+#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define FOR_EACH_CHANNEL(CHAN)\
@@ -262,6 +266,8 @@ tgsi_exec_machine_init(
mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+ mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
+ mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
}
}
@@ -287,10 +293,10 @@ micro_abs(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) fabs( (double) src->f[0] );
- dst->f[1] = (float) fabs( (double) src->f[1] );
- dst->f[2] = (float) fabs( (double) src->f[2] );
- dst->f[3] = (float) fabs( (double) src->f[3] );
+ dst->f[0] = fabsf( src->f[0] );
+ dst->f[1] = fabsf( src->f[1] );
+ dst->f[2] = fabsf( src->f[2] );
+ dst->f[3] = fabsf( src->f[3] );
}
static void
@@ -334,10 +340,10 @@ micro_ceil(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) ceil( (double) src->f[0] );
- dst->f[1] = (float) ceil( (double) src->f[1] );
- dst->f[2] = (float) ceil( (double) src->f[2] );
- dst->f[3] = (float) ceil( (double) src->f[3] );
+ dst->f[0] = ceilf( src->f[0] );
+ dst->f[1] = ceilf( src->f[1] );
+ dst->f[2] = ceilf( src->f[2] );
+ dst->f[3] = ceilf( src->f[3] );
}
static void
@@ -345,10 +351,10 @@ micro_cos(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) cos( (double) src->f[0] );
- dst->f[1] = (float) cos( (double) src->f[1] );
- dst->f[2] = (float) cos( (double) src->f[2] );
- dst->f[3] = (float) cos( (double) src->f[3] );
+ dst->f[0] = cosf( src->f[0] );
+ dst->f[1] = cosf( src->f[1] );
+ dst->f[2] = cosf( src->f[2] );
+ dst->f[3] = cosf( src->f[3] );
}
static void
@@ -430,10 +436,10 @@ micro_exp2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
- dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
- dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
- dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
- dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
+ dst->f[0] = powf( 2.0f, src->f[0] );
+ dst->f[1] = powf( 2.0f, src->f[1] );
+ dst->f[2] = powf( 2.0f, src->f[2] );
+ dst->f[3] = powf( 2.0f, src->f[3] );
}
static void
@@ -463,10 +469,10 @@ micro_flr(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) floor( (double) src->f[0] );
- dst->f[1] = (float) floor( (double) src->f[1] );
- dst->f[2] = (float) floor( (double) src->f[2] );
- dst->f[3] = (float) floor( (double) src->f[3] );
+ dst->f[0] = floorf( src->f[0] );
+ dst->f[1] = floorf( src->f[1] );
+ dst->f[2] = floorf( src->f[2] );
+ dst->f[3] = floorf( src->f[3] );
}
static void
@@ -474,10 +480,10 @@ micro_frc(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
- dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
- dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
- dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
+ dst->f[0] = src->f[0] - floorf( src->f[0] );
+ dst->f[1] = src->f[1] - floorf( src->f[1] );
+ dst->f[2] = src->f[2] - floorf( src->f[2] );
+ dst->f[3] = src->f[3] - floorf( src->f[3] );
}
static void
@@ -510,10 +516,24 @@ micro_lg2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
- dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
- dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
- dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
+ dst->f[0] = logf( src->f[0] ) * 1.442695f;
+ dst->f[1] = logf( src->f[1] ) * 1.442695f;
+ dst->f[2] = logf( src->f[2] ) * 1.442695f;
+ dst->f[3] = logf( src->f[3] ) * 1.442695f;
+}
+
+static void
+micro_le(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
+ dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
+ dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
+ dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
}
static void
@@ -764,10 +784,10 @@ micro_pow(
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1 )
{
- dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
- dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
- dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
- dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
+ dst->f[0] = powf( src0->f[0], src1->f[0] );
+ dst->f[1] = powf( src0->f[1], src1->f[1] );
+ dst->f[2] = powf( src0->f[2], src1->f[2] );
+ dst->f[3] = powf( src0->f[3], src1->f[3] );
}
static void
@@ -775,10 +795,10 @@ micro_rnd(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
- dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
- dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
- dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
+ dst->f[0] = floorf( src->f[0] + 0.5f );
+ dst->f[1] = floorf( src->f[1] + 0.5f );
+ dst->f[2] = floorf( src->f[2] + 0.5f );
+ dst->f[3] = floorf( src->f[3] + 0.5f );
}
static void
@@ -833,20 +853,20 @@ micro_sin(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) sin( (double) src->f[0] );
- dst->f[1] = (float) sin( (double) src->f[1] );
- dst->f[2] = (float) sin( (double) src->f[2] );
- dst->f[3] = (float) sin( (double) src->f[3] );
+ dst->f[0] = sinf( src->f[0] );
+ dst->f[1] = sinf( src->f[1] );
+ dst->f[2] = sinf( src->f[2] );
+ dst->f[3] = sinf( src->f[3] );
}
static void
micro_sqrt( union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) sqrt( (double) src->f[0] );
- dst->f[1] = (float) sqrt( (double) src->f[1] );
- dst->f[2] = (float) sqrt( (double) src->f[2] );
- dst->f[3] = (float) sqrt( (double) src->f[3] );
+ dst->f[0] = sqrtf( src->f[0] );
+ dst->f[1] = sqrtf( src->f[1] );
+ dst->f[2] = sqrtf( src->f[2] );
+ dst->f[3] = sqrtf( src->f[3] );
}
static void
@@ -1516,41 +1536,44 @@ exec_instruction(
break;
case TGSI_OPCODE_EXP:
- debug_printf("TGSI: EXP opcode not implemented\n");
- /* from ARB_v_p:
- tmp = ScalarLoad(op0);
- result.x = 2^floor(tmp);
- result.y = tmp - floor(tmp);
- result.z = RoughApprox2ToX(tmp);
- result.w = 1.0;
- */
-#if 0
- /* something like this: */
FETCH( &r[0], 0, CHAN_X );
- micro_exp2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
+ STORE( &r[2], 0, CHAN_X ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
+ STORE( &r[2], 0, CHAN_Y ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
+ STORE( &r[2], 0, CHAN_Z ); /* store r2 */
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
-#endif
break;
case TGSI_OPCODE_LOG:
- debug_printf("TGSI: LOG opcode not implemented\n");
- /* from ARB_v_p:
- tmp = fabs(ScalarLoad(op0));
- result.x = floor(log2(tmp));
- result.y = tmp / 2^(floor(log2(tmp)));
- result.z = RoughApproxLog2(tmp);
- result.w = 1.0;
- */
-#if 0
- /* something like this: */
FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
+ micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
+ micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &r[0], 0, CHAN_X );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
+ micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
+ STORE( &r[0], 0, CHAN_Y );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( &r[1], 0, CHAN_Z );
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
-#endif
break;
case TGSI_OPCODE_MUL:
@@ -1975,7 +1998,7 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1992,7 +2015,7 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
STORE( &r[0], 0, chan_index );
}
break;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 45c49dd007..19bd78df3d 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -133,9 +133,15 @@ struct tgsi_exec_labels
#define TGSI_EXEC_TEMP_PRIMITIVE_I 34
#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-#define TGSI_EXEC_TEMP_R0 35
+#define TGSI_EXEC_TEMP_THREE_I 34
+#define TGSI_EXEC_TEMP_THREE_C 3
-#define TGSI_EXEC_NUM_TEMPS (32 + 4)
+#define TGSI_EXEC_TEMP_HALF_I 35
+#define TGSI_EXEC_TEMP_HALF_C 0
+
+#define TGSI_EXEC_TEMP_R0 36
+
+#define TGSI_EXEC_NUM_TEMPS (32 + 5)
#define TGSI_EXEC_NUM_ADDRS 1
#define TGSI_EXEC_NUM_IMMEDIATES 256
@@ -166,7 +172,7 @@ struct tgsi_exec_machine
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
- float (*Consts)[4];
+ const float (*Consts)[4];
struct tgsi_exec_vector *Inputs;
struct tgsi_exec_vector *Outputs;
const struct tgsi_token *Tokens;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 4e80597b3f..8018bd7fa4 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -34,115 +34,14 @@
#include "rtasm/rtasm_x86sse.h"
-#if defined(__i386__) || defined(__386__)
+#ifdef PIPE_ARCH_X86
-#define DUMP_SSE 0
-
-#if DUMP_SSE
-
-static void
-_print_reg(
- struct x86_reg reg )
-{
- if (reg.mod != mod_REG)
- debug_printf( "[" );
-
- switch( reg.file ) {
- case file_REG32:
- switch( reg.idx ) {
- case reg_AX:
- debug_printf( "EAX" );
- break;
- case reg_CX:
- debug_printf( "ECX" );
- break;
- case reg_DX:
- debug_printf( "EDX" );
- break;
- case reg_BX:
- debug_printf( "EBX" );
- break;
- case reg_SP:
- debug_printf( "ESP" );
- break;
- case reg_BP:
- debug_printf( "EBP" );
- break;
- case reg_SI:
- debug_printf( "ESI" );
- break;
- case reg_DI:
- debug_printf( "EDI" );
- break;
- }
- break;
- case file_MMX:
- assert( 0 );
- break;
- case file_XMM:
- debug_printf( "XMM%u", reg.idx );
- break;
- case file_x87:
- assert( 0 );
- break;
- }
-
- if (reg.mod == mod_DISP8 ||
- reg.mod == mod_DISP32)
- debug_printf("+%d", reg.disp);
-
- if (reg.mod != mod_REG)
- debug_printf( "]" );
-}
-
-static void
-_fill(
- const char *op )
-{
- unsigned count = 10 - strlen( op );
-
- while( count-- ) {
- debug_printf( " " );
- }
-}
-
-#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
-#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
-#define DUMP( OP ) debug_printf( "\n%s", OP )
-#define DUMP_I( OP, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- debug_printf( "%u", I ); } while( 0 )
-#define DUMP_R( OP, R0 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 ); } while( 0 )
-#define DUMP_RR( OP, R0, R1 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 ); } while( 0 )
-#define DUMP_RRI( OP, R0, R1, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 );\
- debug_printf( ", " );\
- debug_printf( "%u", I ); } while( 0 )
-
-#else
-
-#define DUMP_START()
-#define DUMP_END()
-#define DUMP( OP )
-#define DUMP_I( OP, I )
-#define DUMP_R( OP, R0 )
-#define DUMP_RR( OP, R0, R1 )
-#define DUMP_RRI( OP, R0, R1, I )
+/* for 1/sqrt()
+ *
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1
-#endif
#define FOR_EACH_CHANNEL( CHAN )\
for( CHAN = 0; CHAN < 4; CHAN++ )
@@ -208,15 +107,9 @@ get_output_base( void )
static struct x86_reg
get_temp_base( void )
{
-#ifdef WIN32
return x86_make_reg(
file_REG32,
reg_BX );
-#else
- return x86_make_reg(
- file_REG32,
- reg_SI );
-#endif
}
static struct x86_reg
@@ -225,17 +118,28 @@ get_coef_base( void )
return get_output_base();
}
+static struct x86_reg
+get_immediate_base( void )
+{
+ return x86_make_reg(
+ file_REG32,
+ reg_DI );
+}
+
+
/**
* Data access helpers.
*/
+
static struct x86_reg
-get_argument(
- unsigned index )
+get_immediate(
+ unsigned vec,
+ unsigned chan )
{
return x86_make_disp(
- x86_make_reg( file_REG32, reg_SP ),
- (index + 1) * 4 );
+ get_immediate_base(),
+ (vec * 4 + chan) * 4 );
}
static struct x86_reg
@@ -289,200 +193,6 @@ get_coef(
((vec * 3 + member) * 4 + chan) * 4 );
}
-/**
- * X86 rtasm wrappers.
- */
-
-static void
-emit_addps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ADDPS", dst, src );
- sse_addps( func, dst, src );
-}
-
-static void
-emit_andnps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDNPS", dst, src );
- sse_andnps( func, dst, src );
-}
-
-static void
-emit_andps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDPS", dst, src );
- sse_andps( func, dst, src );
-}
-
-static void
-emit_call(
- struct x86_function *func,
- void (* addr)() )
-{
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- DUMP_I( "CALL", addr );
- x86_mov_reg_imm( func, ecx, (unsigned long) addr );
- x86_call( func, ecx );
-}
-
-static void
-emit_cmpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- enum sse_cc cc )
-{
- DUMP_RRI( "CMPPS", dst, src, cc );
- sse_cmpps( func, dst, src, cc );
-}
-
-static void
-emit_cvttps2dq(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "CVTTPS2DQ", dst, src );
- sse2_cvttps2dq( func, dst, src );
-}
-
-static void
-emit_maxps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MAXPS", dst, src );
- sse_maxps( func, dst, src );
-}
-
-static void
-emit_minps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MINPS", dst, src );
- sse_minps( func, dst, src );
-}
-
-static void
-emit_mov(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOV", dst, src );
- x86_mov( func, dst, src );
-}
-
-static void
-emit_movaps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVAPS", dst, src );
- sse_movaps( func, dst, src );
-}
-
-static void
-emit_movss(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVSS", dst, src );
- sse_movss( func, dst, src );
-}
-
-static void
-emit_movups(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVUPS", dst, src );
- sse_movups( func, dst, src );
-}
-
-static void
-emit_mulps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MULPS", dst, src );
- sse_mulps( func, dst, src );
-}
-
-static void
-emit_or(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "OR", dst, src );
- x86_or( func, dst, src );
-}
-
-static void
-emit_orps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ORPS", dst, src );
- sse_orps( func, dst, src );
-}
-
-static void
-emit_pmovmskb(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "PMOVMSKB", dst, src );
- sse_pmovmskb( func, dst, src );
-}
-
-static void
-emit_pop(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "POP", dst );
- x86_pop( func, dst );
-}
-
-static void
-emit_push(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "PUSH", dst );
- x86_push( func, dst );
-}
-
-static void
-emit_rcpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RCPPS", dst, src );
- sse2_rcpps( func, dst, src );
-}
#ifdef WIN32
static void
@@ -490,7 +200,6 @@ emit_retw(
struct x86_function *func,
unsigned size )
{
- DUMP_I( "RET", size );
x86_retw( func, size );
}
#else
@@ -498,56 +207,21 @@ static void
emit_ret(
struct x86_function *func )
{
- DUMP( "RET" );
x86_ret( func );
}
#endif
-static void
-emit_rsqrtps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RSQRTPS", dst, src );
- sse_rsqrtps( func, dst, src );
-}
-
-static void
-emit_shufps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- unsigned char shuf )
-{
- DUMP_RRI( "SHUFPS", dst, src, shuf );
- sse_shufps( func, dst, src, shuf );
-}
-
-static void
-emit_subps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "SUBPS", dst, src );
- sse_subps( func, dst, src );
-}
-
-static void
-emit_xorps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "XORPS", dst, src );
- sse_xorps( func, dst, src );
-}
/**
* Data fetch helpers.
*/
+/**
+ * Copy a shader constant to xmm register
+ * \param xmm the destination xmm register
+ * \param vec the src const buffer index
+ * \param chan src channel to fetch (X, Y, Z or W)
+ */
static void
emit_const(
struct x86_function *func,
@@ -555,11 +229,11 @@ emit_const(
unsigned vec,
unsigned chan )
{
- emit_movss(
+ sse_movss(
func,
make_xmm( xmm ),
get_const( vec, chan ) );
- emit_shufps(
+ sse_shufps(
func,
make_xmm( xmm ),
make_xmm( xmm ),
@@ -567,18 +241,49 @@ emit_const(
}
static void
+emit_immediate(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ sse_movss(
+ func,
+ make_xmm( xmm ),
+ get_immediate( vec, chan ) );
+ sse_shufps(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ),
+ SHUF( 0, 0, 0, 0 ) );
+}
+
+
+/**
+ * Copy a shader input to xmm register
+ * \param xmm the destination xmm register
+ * \param vec the src input attrib
+ * \param chan src channel to fetch (X, Y, Z or W)
+ */
+static void
emit_inputf(
struct x86_function *func,
unsigned xmm,
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
make_xmm( xmm ),
get_input( vec, chan ) );
}
+/**
+ * Store an xmm register to a shader output
+ * \param xmm the source xmm register
+ * \param vec the dest output attrib
+ * \param chan src dest channel to store (X, Y, Z or W)
+ */
static void
emit_output(
struct x86_function *func,
@@ -586,12 +291,18 @@ emit_output(
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
get_output( vec, chan ),
make_xmm( xmm ) );
}
+/**
+ * Copy a shader temporary to xmm register
+ * \param xmm the destination xmm register
+ * \param vec the src temp register
+ * \param chan src channel to fetch (X, Y, Z or W)
+ */
static void
emit_tempf(
struct x86_function *func,
@@ -599,12 +310,19 @@ emit_tempf(
unsigned vec,
unsigned chan )
{
- emit_movaps(
+ sse_movaps(
func,
make_xmm( xmm ),
get_temp( vec, chan ) );
}
+/**
+ * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
+ * \param xmm the destination xmm register
+ * \param vec the src input/attribute coefficient index
+ * \param chan src channel to fetch (X, Y, Z or W)
+ * \param member 0=a0, 1=dadx, 2=dady
+ */
static void
emit_coef(
struct x86_function *func,
@@ -613,11 +331,11 @@ emit_coef(
unsigned chan,
unsigned member )
{
- emit_movss(
+ sse_movss(
func,
make_xmm( xmm ),
get_coef( vec, chan, member ) );
- emit_shufps(
+ sse_shufps(
func,
make_xmm( xmm ),
make_xmm( xmm ),
@@ -635,7 +353,7 @@ emit_inputs(
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
get_input( vec, chan ),
make_xmm( xmm ) );
@@ -648,7 +366,7 @@ emit_temps(
unsigned vec,
unsigned chan )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( vec, chan ),
make_xmm( xmm ) );
@@ -725,41 +443,32 @@ static void
emit_push_gp(
struct x86_function *func )
{
- emit_push(
+ x86_push(
func,
- get_const_base() );
- emit_push(
+ x86_make_reg( file_REG32, reg_AX) );
+ x86_push(
func,
- get_input_base() );
- emit_push(
+ x86_make_reg( file_REG32, reg_CX) );
+ x86_push(
func,
- get_output_base() );
-
- /* It is important on non-win32 platforms that temp base is pushed last.
- */
- emit_push(
- func,
- get_temp_base() );
+ x86_make_reg( file_REG32, reg_DX) );
}
static void
-emit_pop_gp(
+x86_pop_gp(
struct x86_function *func )
{
/* Restore GP registers in a reverse order.
*/
- emit_pop(
- func,
- get_temp_base() );
- emit_pop(
+ x86_pop(
func,
- get_output_base() );
- emit_pop(
+ x86_make_reg( file_REG32, reg_DX) );
+ x86_pop(
func,
- get_input_base() );
- emit_pop(
+ x86_make_reg( file_REG32, reg_CX) );
+ x86_pop(
func,
- get_const_base() );
+ x86_make_reg( file_REG32, reg_AX) );
}
static void
@@ -768,7 +477,7 @@ emit_func_call_dst(
unsigned xmm_dst,
void (*code)() )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( TEMP_R0, 0 ),
make_xmm( xmm_dst ) );
@@ -776,20 +485,27 @@ emit_func_call_dst(
emit_push_gp(
func );
-#ifdef WIN32
- emit_push(
- func,
- get_temp( TEMP_R0, 0 ) );
+ {
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+ x86_lea(
+ func,
+ ecx,
+ get_temp( TEMP_R0, 0 ) );
+
+ x86_push( func, ecx );
+ x86_mov_reg_imm( func, ecx, (unsigned long) code );
+ x86_call( func, ecx );
+#ifndef WIN32
+ x86_pop(func, ecx );
#endif
+ }
- emit_call(
- func,
- code );
- emit_pop_gp(
+ x86_pop_gp(
func );
- emit_movaps(
+ sse_movaps(
func,
make_xmm( xmm_dst ),
get_temp( TEMP_R0, 0 ) );
@@ -802,7 +518,7 @@ emit_func_call_dst_src(
unsigned xmm_src,
void (*code)() )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( TEMP_R0, 1 ),
make_xmm( xmm_src ) );
@@ -822,7 +538,7 @@ emit_abs(
struct x86_function *func,
unsigned xmm )
{
- emit_andps(
+ sse_andps(
func,
make_xmm( xmm ),
get_temp(
@@ -836,7 +552,7 @@ emit_add(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_addps(
+ sse_addps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -846,18 +562,12 @@ static void XSTDCALL
cos4f(
float *store )
{
-#ifdef WIN32
- store[0] = (float) cos( (double) store[0] );
- store[1] = (float) cos( (double) store[1] );
- store[2] = (float) cos( (double) store[2] );
- store[3] = (float) cos( (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
+ const unsigned X = 0;
+
store[X + 0] = cosf( store[X + 0] );
store[X + 1] = cosf( store[X + 1] );
store[X + 2] = cosf( store[X + 2] );
store[X + 3] = cosf( store[X + 3] );
-#endif
}
static void
@@ -875,18 +585,12 @@ static void XSTDCALL
ex24f(
float *store )
{
-#ifdef WIN32
- store[0] = (float) pow( 2.0, (double) store[0] );
- store[1] = (float) pow( 2.0, (double) store[1] );
- store[2] = (float) pow( 2.0, (double) store[2] );
- store[3] = (float) pow( 2.0, (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
+ const unsigned X = 0;
+
store[X + 0] = powf( 2.0f, store[X + 0] );
store[X + 1] = powf( 2.0f, store[X + 1] );
store[X + 2] = powf( 2.0f, store[X + 2] );
store[X + 3] = powf( 2.0f, store[X + 3] );
-#endif
}
static void
@@ -905,7 +609,7 @@ emit_f2it(
struct x86_function *func,
unsigned xmm )
{
- emit_cvttps2dq(
+ sse2_cvttps2dq(
func,
make_xmm( xmm ),
make_xmm( xmm ) );
@@ -915,15 +619,12 @@ static void XSTDCALL
flr4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] = (float) floor( (double) store[X + 0] );
- store[X + 1] = (float) floor( (double) store[X + 1] );
- store[X + 2] = (float) floor( (double) store[X + 2] );
- store[X + 3] = (float) floor( (double) store[X + 3] );
+
+ store[X + 0] = floorf( store[X + 0] );
+ store[X + 1] = floorf( store[X + 1] );
+ store[X + 2] = floorf( store[X + 2] );
+ store[X + 3] = floorf( store[X + 3] );
}
static void
@@ -941,15 +642,12 @@ static void XSTDCALL
frc4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] -= (float) floor( (double) store[X + 0] );
- store[X + 1] -= (float) floor( (double) store[X + 1] );
- store[X + 2] -= (float) floor( (double) store[X + 2] );
- store[X + 3] -= (float) floor( (double) store[X + 3] );
+
+ store[X + 0] -= floorf( store[X + 0] );
+ store[X + 1] -= floorf( store[X + 1] );
+ store[X + 2] -= floorf( store[X + 2] );
+ store[X + 3] -= floorf( store[X + 3] );
}
static void
@@ -967,11 +665,8 @@ static void XSTDCALL
lg24f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = LOG2( store[X + 0] );
store[X + 1] = LOG2( store[X + 1] );
store[X + 2] = LOG2( store[X + 2] );
@@ -995,7 +690,7 @@ emit_MOV(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_movups(
+ sse_movups(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1006,7 +701,7 @@ emit_mul (struct x86_function *func,
unsigned xmm_dst,
unsigned xmm_src)
{
- emit_mulps(
+ sse_mulps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1017,7 +712,7 @@ emit_neg(
struct x86_function *func,
unsigned xmm )
{
- emit_xorps(
+ sse_xorps(
func,
make_xmm( xmm ),
get_temp(
@@ -1029,18 +724,12 @@ static void XSTDCALL
pow4f(
float *store )
{
-#ifdef WIN32
- store[0] = (float) pow( (double) store[0], (double) store[4] );
- store[1] = (float) pow( (double) store[1], (double) store[5] );
- store[2] = (float) pow( (double) store[2], (double) store[6] );
- store[3] = (float) pow( (double) store[3], (double) store[7] );
-#else
- const unsigned X = TEMP_R0 * 16;
+ const unsigned X = 0;
+
store[X + 0] = powf( store[X + 0], store[X + 4] );
store[X + 1] = powf( store[X + 1], store[X + 5] );
store[X + 2] = powf( store[X + 2], store[X + 6] );
store[X + 3] = powf( store[X + 3], store[X + 7] );
-#endif
}
static void
@@ -1062,7 +751,11 @@ emit_rcp (
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_rcpps(
+ /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+ * good enough. Need to either emit a proper divide or use the
+ * iterative technique described below in emit_rsqrt().
+ */
+ sse2_rcpps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1074,10 +767,44 @@ emit_rsqrt(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_rsqrtps(
+#if HIGH_PRECISION
+ /* Although rsqrtps() and rcpps() are low precision on some/all SSE
+ * implementations, it is possible to improve its precision at
+ * fairly low cost, using a newton/raphson step, as below:
+ *
+ * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+ * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ *
+ * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+ {
+ struct x86_reg dst = make_xmm( xmm_dst );
+ struct x86_reg src = make_xmm( xmm_src );
+ struct x86_reg tmp0 = make_xmm( 2 );
+ struct x86_reg tmp1 = make_xmm( 3 );
+
+ assert( xmm_dst != xmm_src );
+ assert( xmm_dst != 2 && xmm_dst != 3 );
+ assert( xmm_src != 2 && xmm_src != 3 );
+
+ sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+ sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+ sse_rsqrtps( func, tmp1, src );
+ sse_mulps( func, src, tmp1 );
+ sse_mulps( func, dst, tmp1 );
+ sse_mulps( func, src, tmp1 );
+ sse_subps( func, tmp0, src );
+ sse_mulps( func, dst, tmp0 );
+ }
+#else
+ /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+ * good enough.
+ */
+ sse_rsqrtps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
+#endif
}
static void
@@ -1085,7 +812,7 @@ emit_setsign(
struct x86_function *func,
unsigned xmm )
{
- emit_orps(
+ sse_orps(
func,
make_xmm( xmm ),
get_temp(
@@ -1097,18 +824,12 @@ static void XSTDCALL
sin4f(
float *store )
{
-#ifdef WIN32
- store[0] = (float) sin( (double) store[0] );
- store[1] = (float) sin( (double) store[1] );
- store[2] = (float) sin( (double) store[2] );
- store[3] = (float) sin( (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
+ const unsigned X = 0;
+
store[X + 0] = sinf( store[X + 0] );
store[X + 1] = sinf( store[X + 1] );
store[X + 2] = sinf( store[X + 2] );
store[X + 3] = sinf( store[X + 3] );
-#endif
}
static void
@@ -1127,7 +848,7 @@ emit_sub(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_subps(
+ sse_subps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1160,6 +881,14 @@ emit_fetch(
swizzle );
break;
+ case TGSI_FILE_IMMEDIATE:
+ emit_immediate(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
case TGSI_FILE_INPUT:
emit_inputf(
func,
@@ -1328,16 +1057,16 @@ emit_kil(
}
}
- emit_push(
+ x86_push(
func,
x86_make_reg( file_REG32, reg_AX ) );
- emit_push(
+ x86_push(
func,
x86_make_reg( file_REG32, reg_DX ) );
FOR_EACH_CHANNEL( chan_index ) {
if( uniquemask & (1 << chan_index) ) {
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( registers[chan_index] ),
get_temp(
@@ -1346,17 +1075,17 @@ emit_kil(
cc_LessThan );
if( chan_index == firstchan ) {
- emit_pmovmskb(
+ sse_pmovmskb(
func,
x86_make_reg( file_REG32, reg_AX ),
make_xmm( registers[chan_index] ) );
}
else {
- emit_pmovmskb(
+ sse_pmovmskb(
func,
x86_make_reg( file_REG32, reg_DX ),
make_xmm( registers[chan_index] ) );
- emit_or(
+ x86_or(
func,
x86_make_reg( file_REG32, reg_AX ),
x86_make_reg( file_REG32, reg_DX ) );
@@ -1364,17 +1093,17 @@ emit_kil(
}
}
- emit_or(
+ x86_or(
func,
get_temp(
TGSI_EXEC_TEMP_KILMASK_I,
TGSI_EXEC_TEMP_KILMASK_C ),
x86_make_reg( file_REG32, reg_AX ) );
- emit_pop(
+ x86_pop(
func,
x86_make_reg( file_REG32, reg_DX ) );
- emit_pop(
+ x86_pop(
func,
x86_make_reg( file_REG32, reg_AX ) );
}
@@ -1390,12 +1119,12 @@ emit_setcc(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 0 ),
make_xmm( 1 ),
cc );
- emit_andps(
+ sse_andps(
func,
make_xmm( 0 ),
get_temp(
@@ -1416,22 +1145,22 @@ emit_cmp(
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
FETCH( func, *inst, 2, 2, chan_index );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 0 ),
get_temp(
TGSI_EXEC_TEMP_00000000_I,
TGSI_EXEC_TEMP_00000000_C ),
cc_LessThan );
- emit_andps(
+ sse_andps(
func,
make_xmm( 1 ),
make_xmm( 0 ) );
- emit_andnps(
+ sse_andnps(
func,
make_xmm( 0 ),
make_xmm( 2 ) );
- emit_orps(
+ sse_orps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -1448,11 +1177,16 @@ emit_instruction(
switch( inst->Instruction.Opcode ) {
case TGSI_OPCODE_ARL:
+#if 0
+ /* XXX this isn't working properly (see glean vertProg1 test) */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
+#else
+ return 0;
+#endif
break;
case TGSI_OPCODE_MOV:
@@ -1482,7 +1216,7 @@ emit_instruction(
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_maxps(
+ sse_maxps(
func,
make_xmm( 0 ),
get_temp(
@@ -1491,21 +1225,26 @@ emit_instruction(
STORE( func, *inst, 0, 0, CHAN_Y );
}
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ /* XMM[1] = SrcReg[0].yyyy */
FETCH( func, *inst, 1, 0, CHAN_Y );
- emit_maxps(
+ /* XMM[1] = max(XMM[1], 0) */
+ sse_maxps(
func,
make_xmm( 1 ),
get_temp(
TGSI_EXEC_TEMP_00000000_I,
TGSI_EXEC_TEMP_00000000_C ) );
+ /* XMM[2] = SrcReg[0].wwww */
FETCH( func, *inst, 2, 0, CHAN_W );
- emit_minps(
+ /* XMM[2] = min(XMM[2], 128.0) */
+ sse_minps(
func,
make_xmm( 2 ),
get_temp(
TGSI_EXEC_TEMP_128_I,
TGSI_EXEC_TEMP_128_C ) );
- emit_maxps(
+ /* XMM[2] = max(XMM[2], -128.0) */
+ sse_maxps(
func,
make_xmm( 2 ),
get_temp(
@@ -1513,16 +1252,16 @@ emit_instruction(
TGSI_EXEC_TEMP_MINUS_128_C ) );
emit_pow( func, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_xorps(
+ sse_xorps(
func,
make_xmm( 2 ),
make_xmm( 2 ) );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 2 ),
make_xmm( 0 ),
cc_LessThanEqual );
- emit_andps(
+ sse_andps(
func,
make_xmm( 2 ),
make_xmm( 1 ) );
@@ -1543,9 +1282,9 @@ emit_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rsqrt( func, 0, 0 );
+ emit_rsqrt( func, 1, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
+ STORE( func, *inst, 1, 0, chan_index );
}
break;
@@ -1644,7 +1383,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_minps(
+ sse_minps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -1656,7 +1395,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_maxps(
+ sse_maxps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -1997,7 +1736,6 @@ emit_instruction(
break;
case TGSI_OPCODE_RET:
- case TGSI_OPCODE_END:
#ifdef WIN32
emit_retw( func, 16 );
#else
@@ -2005,6 +1743,9 @@ emit_instruction(
#endif
break;
+ case TGSI_OPCODE_END:
+ break;
+
case TGSI_OPCODE_SSG:
return 0;
break;
@@ -2020,7 +1761,7 @@ emit_instruction(
STORE( func, *inst, 0, 0, CHAN_X );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
+ FETCH( func, *inst, 0, 0, CHAN_X );
emit_sin( func, 0 );
STORE( func, *inst, 0, 0, CHAN_Y );
}
@@ -2236,149 +1977,289 @@ emit_declaration(
}
}
-unsigned
-tgsi_emit_sse2(
- struct tgsi_token *tokens,
- struct x86_function *func )
-{
- struct tgsi_parse_context parse;
- unsigned ok = 1;
-
- DUMP_START();
-
- func->csr = func->store;
-
- emit_mov(
- func,
- get_input_base(),
- get_argument( 0 ) );
- emit_mov(
- func,
- get_output_base(),
- get_argument( 1 ) );
- emit_mov(
- func,
- get_const_base(),
- get_argument( 2 ) );
- emit_mov(
- func,
- get_temp_base(),
- get_argument( 3 ) );
-
- tgsi_parse_init( &parse, tokens );
-
- while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- ok = emit_instruction(
- func,
- &parse.FullToken.FullInstruction );
-
- if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to SSE\n",
- parse.FullToken.FullInstruction.Instruction.Opcode );
- }
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX implement this */
- ok = 0;
- debug_printf("failed to emit immediate value to SSE\n");
- break;
-
- default:
- assert( 0 );
- ok = 0;
- break;
- }
+static void aos_to_soa( struct x86_function *func,
+ uint arg_aos,
+ uint arg_soa,
+ uint arg_num,
+ uint arg_stride )
+{
+ struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+ struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+ struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
+ int inner_loop;
+
+
+ /* Save EBX */
+ x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+
+ x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
+ x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
+ x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+ x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
+
+ /* do */
+ inner_loop = x86_get_label( func );
+ {
+ x86_push( func, aos_input );
+ sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+ sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+ x86_add( func, aos_input, stride );
+ sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+ sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+ x86_add( func, aos_input, stride );
+ sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+ sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+ x86_add( func, aos_input, stride );
+ sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+ sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+ x86_pop( func, aos_input );
+
+ sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+ sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+ sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
+ sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
+ sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
+ sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
+
+ sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
+ sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
+ sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
+ sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
+
+ /* Advance to next input */
+ x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
+ x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
}
+ /* while --num_inputs */
+ x86_dec( func, num_inputs );
+ x86_jcc( func, cc_NE, inner_loop );
+
+ /* Restore EBX */
+ x86_pop( func, aos_input );
+}
+
+static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+{
+ struct x86_reg soa_output;
+ struct x86_reg aos_output;
+ struct x86_reg num_outputs;
+ struct x86_reg temp;
+ int inner_loop;
+
+ soa_output = x86_make_reg( file_REG32, reg_AX );
+ aos_output = x86_make_reg( file_REG32, reg_BX );
+ num_outputs = x86_make_reg( file_REG32, reg_CX );
+ temp = x86_make_reg( file_REG32, reg_DX );
+
+ /* Save EBX */
+ x86_push( func, aos_output );
+
+ x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+ x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+ x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
+
+ /* do */
+ inner_loop = x86_get_label( func );
+ {
+ sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
+ sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
+ sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
+ sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
+
+ sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+ sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+ sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
+ sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
+ sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
+ sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
+
+ x86_mov( func, temp, x86_fn_arg( func, stride ) );
+ x86_push( func, aos_output );
+ sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+ sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+ x86_add( func, aos_output, temp );
+ sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+ sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+ x86_add( func, aos_output, temp );
+ sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+ sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+ x86_add( func, aos_output, temp );
+ sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+ sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+ x86_pop( func, aos_output );
+
+ /* Advance to next output */
+ x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
+ x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
+ }
+ /* while --num_outputs */
+ x86_dec( func, num_outputs );
+ x86_jcc( func, cc_NE, inner_loop );
- tgsi_parse_free( &parse );
-
- DUMP_END();
-
- return ok;
+ /* Restore EBX */
+ x86_pop( func, aos_output );
}
/**
- * Fragment shaders are responsible for interpolating shader inputs. Because on
- * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
- * output, const, temp and coef), the code is split into two phases --
- * DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff argument,
- * as outputs are not needed in the DECLARATION phase.
+ * Translate a TGSI vertex/fragment shader to SSE2 code.
+ * Slightly different things are done for vertex vs. fragment shaders.
+ *
+ * Note that fragment shaders are responsible for interpolating shader
+ * inputs. Because on x86 we have only 4 GP registers, and here we
+ * have 5 shader arguments (input, output, const, temp and coef), the
+ * code is split into two phases -- DECLARATION and INSTRUCTION phase.
+ * GP register holding the output argument is aliased with the coeff
+ * argument, as outputs are not needed in the DECLARATION phase.
+ *
+ * \param tokens the TGSI input shader
+ * \param func the output SSE code/function
+ * \param immediates buffer to place immediates, later passed to SSE func
+ * \param return 1 for success, 0 if translation failed
*/
unsigned
-tgsi_emit_sse2_fs(
- struct tgsi_token *tokens,
- struct x86_function *func )
+tgsi_emit_sse2(
+ const struct tgsi_token *tokens,
+ struct x86_function *func,
+ float (*immediates)[4],
+ boolean do_swizzles )
{
struct tgsi_parse_context parse;
boolean instruction_phase = FALSE;
unsigned ok = 1;
-
- DUMP_START();
+ uint num_immediates = 0;
func->csr = func->store;
- /* DECLARATION phase, do not load output argument. */
- emit_mov(
- func,
- get_input_base(),
- get_argument( 0 ) );
- emit_mov(
- func,
- get_const_base(),
- get_argument( 2 ) );
- emit_mov(
+ tgsi_parse_init( &parse, tokens );
+
+ /* Can't just use EDI, EBX without save/restoring them:
+ */
+ x86_push(
func,
- get_temp_base(),
- get_argument( 3 ) );
- emit_mov(
+ get_immediate_base() );
+
+ x86_push(
func,
- get_coef_base(),
- get_argument( 4 ) );
+ get_temp_base() );
- tgsi_parse_init( &parse, tokens );
+
+ /*
+ * Different function args for vertex/fragment shaders:
+ */
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+ /* DECLARATION phase, do not load output argument. */
+ x86_mov(
+ func,
+ get_input_base(),
+ x86_fn_arg( func, 1 ) );
+ /* skipping outputs argument here */
+ x86_mov(
+ func,
+ get_const_base(),
+ x86_fn_arg( func, 3 ) );
+ x86_mov(
+ func,
+ get_temp_base(),
+ x86_fn_arg( func, 4 ) );
+ x86_mov(
+ func,
+ get_coef_base(),
+ x86_fn_arg( func, 5 ) );
+ x86_mov(
+ func,
+ get_immediate_base(),
+ x86_fn_arg( func, 6 ) );
+ }
+ else {
+ assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
+
+ if (do_swizzles)
+ aos_to_soa( func,
+ 6, /* aos_input */
+ 1, /* machine->input */
+ 7, /* num_inputs */
+ 8 ); /* input_stride */
+
+ x86_mov(
+ func,
+ get_input_base(),
+ x86_fn_arg( func, 1 ) );
+ x86_mov(
+ func,
+ get_output_base(),
+ x86_fn_arg( func, 2 ) );
+ x86_mov(
+ func,
+ get_const_base(),
+ x86_fn_arg( func, 3 ) );
+ x86_mov(
+ func,
+ get_temp_base(),
+ x86_fn_arg( func, 4 ) );
+ x86_mov(
+ func,
+ get_immediate_base(),
+ x86_fn_arg( func, 5 ) );
+ }
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- emit_declaration(
- func,
- &parse.FullToken.FullDeclaration );
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+ emit_declaration(
+ func,
+ &parse.FullToken.FullDeclaration );
+ }
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if( !instruction_phase ) {
- /* INSTRUCTION phase, overwrite coeff with output. */
- instruction_phase = TRUE;
- emit_mov(
- func,
- get_output_base(),
- get_argument( 1 ) );
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if( !instruction_phase ) {
+ /* INSTRUCTION phase, overwrite coeff with output. */
+ instruction_phase = TRUE;
+ x86_mov(
+ func,
+ get_output_base(),
+ x86_fn_arg( func, 2 ) );
+ }
}
+
ok = emit_instruction(
func,
&parse.FullToken.FullInstruction );
if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to SSE\n",
- parse.FullToken.FullInstruction.Instruction.Opcode );
+ debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
+ parse.FullToken.FullInstruction.Instruction.Opcode,
+ parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+ "vertex shader" : "fragment shader");
}
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX implement this */
- ok = 0;
- debug_printf("failed to emit immediate value to SSE\n");
+ /* simply copy the immediate values into the next immediates[] slot */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ uint i;
+ assert(size <= 4);
+ assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+ for( i = 0; i < size; i++ ) {
+ immediates[num_immediates][i] =
+ parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ }
+#if 0
+ debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
+ num_immediates,
+ immediates[num_immediates][0],
+ immediates[num_immediates][1],
+ immediates[num_immediates][2],
+ immediates[num_immediates][3]);
+#endif
+ num_immediates++;
+ }
break;
default:
@@ -2387,11 +2268,30 @@ tgsi_emit_sse2_fs(
}
}
- tgsi_parse_free( &parse );
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
+ if (do_swizzles)
+ soa_to_aos( func, 9, 2, 10, 11 );
+ }
- DUMP_END();
+ /* Can't just use EBX, EDI without save/restoring them:
+ */
+ x86_pop(
+ func,
+ get_temp_base() );
+
+ x86_pop(
+ func,
+ get_immediate_base() );
+
+#ifdef WIN32
+ emit_retw( func, 16 );
+#else
+ emit_ret( func );
+#endif
+
+ tgsi_parse_free( &parse );
return ok;
}
-#endif /* i386 */
+#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index 63b8ef3911..e66d115283 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -10,13 +10,10 @@ struct x86_function;
unsigned
tgsi_emit_sse2(
- struct tgsi_token *tokens,
- struct x86_function *function );
-
-unsigned
-tgsi_emit_sse2_fs(
- struct tgsi_token *tokens,
- struct x86_function *function );
+ const struct tgsi_token *tokens,
+ struct x86_function *function,
+ float (*immediates)[4],
+ boolean do_swizzles );
#if defined __cplusplus
}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 7d292778ad..4c65ffd780 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -25,205 +25,36 @@
*
**************************************************************************/
-#include <stdio.h>
-
#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
+#include "util/u_string.h"
#include "tgsi_dump.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
-struct gen_dump
-{
- unsigned tabs;
- void (* write)(
- struct gen_dump *dump,
- const void *data,
- unsigned size );
-};
-
-struct text_dump
-{
- struct gen_dump base;
- char *text;
- unsigned length;
- unsigned capacity;
-};
-
-static void
-_text_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct text_dump *td = (struct text_dump *) dump;
- unsigned new_length = td->length + size;
-
- if( new_length >= td->capacity ) {
- unsigned new_capacity = td->capacity;
-
- do {
- if( new_capacity == 0 ) {
- new_capacity = 256;
- }
- else {
- new_capacity *= 2;
- }
- } while( new_length >= new_capacity );
- td->text = (char *) REALLOC(
- td->text,
- td->capacity,
- new_capacity );
- td->capacity = new_capacity;
- }
- memcpy(
- &td->text[td->length],
- data,
- size );
- td->length = new_length;
- td->text[td->length] = '\0';
-}
-
-struct file_dump
-{
- struct gen_dump base;
- FILE *file;
-};
-
-static void
-_file_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct file_dump *fd = (struct file_dump *) dump;
-
-#if 0
- fwrite( data, 1, size, fd->file );
-#else
- {
- unsigned i;
-
- for (i = 0; i < size; i++ ) {
- fprintf( fd->file, "%c", ((const char *) data)[i] );
- }
- }
-#endif
-}
-
-static void
-gen_dump_str(
- struct gen_dump *dump,
- const char *str )
-{
- unsigned i;
- size_t len = strlen( str );
-
- for (i = 0; i < len; i++) {
- dump->write( dump, &str[i], 1 );
- if (str[i] == '\n') {
- unsigned i;
-
- for (i = 0; i < dump->tabs; i++) {
- dump->write( dump, " ", 4 );
- }
- }
- }
-}
-
static void
-gen_dump_chr(
- struct gen_dump *dump,
- const char chr )
-{
- dump->write( dump, &chr, 1 );
-}
-
-static void
-gen_dump_uix(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[36];
-
- sprintf( str, "0x%x", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_uid(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[16];
-
- sprintf( str, "%u", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_sid(
- struct gen_dump *dump,
- const int si )
-{
- char str[16];
-
- sprintf( str, "%d", si );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_flt(
- struct gen_dump *dump,
- const float flt )
-{
- char str[48];
-
- sprintf( str, "%10.4f", flt );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_enum(
- struct gen_dump *dump,
+dump_enum(
const unsigned e,
const char **enums,
const unsigned enums_count )
{
if (e >= enums_count) {
- gen_dump_uid( dump, e );
+ debug_printf( "%u", e );
}
else {
- gen_dump_str( dump, enums[e] );
+ debug_printf( "%s", enums[e] );
}
}
-static void
-gen_dump_tab(
- struct gen_dump *dump )
-{
- ++dump->tabs;
-}
-
-static void
-gen_dump_untab(
- struct gen_dump *dump )
-{
- assert( dump->tabs > 0 );
-
- --dump->tabs;
-}
-
-#define TXT(S) gen_dump_str( dump, S )
-#define CHR(C) gen_dump_chr( dump, C )
-#define UIX(I) gen_dump_uix( dump, I )
-#define UID(I) gen_dump_uid( dump, I )
-#define SID(I) gen_dump_sid( dump, I )
-#define FLT(F) gen_dump_flt( dump, F )
-#define TAB() gen_dump_tab( dump )
-#define UNT() gen_dump_untab( dump )
-#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL() debug_printf( "\n" )
+#define TXT(S) debug_printf( "%s", S )
+#define CHR(C) debug_printf( "%c", C )
+#define UIX(I) debug_printf( "0x%x", I )
+#define UID(I) debug_printf( "%u", I )
+#define SID(I) debug_printf( "%d", I )
+#define FLT(F) debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
static const char *TGSI_PROCESSOR_TYPES[] =
{
@@ -710,7 +541,6 @@ static const char *TGSI_MODULATES[] =
static void
dump_declaration_short(
- struct gen_dump *dump,
struct tgsi_full_declaration *decl )
{
TXT( "\nDCL " );
@@ -746,23 +576,24 @@ dump_declaration_short(
}
}
- if( decl->Declaration.Interpolate ) {
+ if (decl->Declaration.Semantic) {
TXT( ", " );
- ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
+ ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
+ if (decl->Semantic.SemanticIndex != 0) {
+ CHR( '[' );
+ UID( decl->Semantic.SemanticIndex );
+ CHR( ']' );
+ }
}
- if( decl->Declaration.Semantic ) {
+ if (decl->Declaration.Interpolate) {
TXT( ", " );
- ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
- CHR( '[' );
- UID( decl->Semantic.SemanticIndex );
- CHR( ']' );
+ ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
}
}
static void
dump_declaration_verbose(
- struct gen_dump *dump,
struct tgsi_full_declaration *decl,
unsigned ignored,
unsigned deflt,
@@ -800,7 +631,7 @@ dump_declaration_verbose(
UIX( decl->Declaration.Padding );
}
- CHR( '\n' );
+ EOL();
switch( decl->Declaration.Declare ) {
case TGSI_DECLARE_RANGE:
TXT( "\nFirst: " );
@@ -819,7 +650,7 @@ dump_declaration_verbose(
}
if( decl->Declaration.Interpolate ) {
- CHR( '\n' );
+ EOL();
TXT( "\nInterpolate: " );
ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
if( ignored ) {
@@ -829,7 +660,7 @@ dump_declaration_verbose(
}
if( decl->Declaration.Semantic ) {
- CHR( '\n' );
+ EOL();
TXT( "\nSemanticName : " );
ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
TXT( "\nSemanticIndex: " );
@@ -843,7 +674,6 @@ dump_declaration_verbose(
static void
dump_immediate_short(
- struct gen_dump *dump,
struct tgsi_full_immediate *imm )
{
unsigned i;
@@ -871,7 +701,6 @@ dump_immediate_short(
static void
dump_immediate_verbose(
- struct gen_dump *dump,
struct tgsi_full_immediate *imm,
unsigned ignored )
{
@@ -885,7 +714,7 @@ dump_immediate_verbose(
}
for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- CHR( '\n' );
+ EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
TXT( "\nFloat: " );
@@ -900,14 +729,13 @@ dump_immediate_verbose(
static void
dump_instruction_short(
- struct gen_dump *dump,
struct tgsi_full_instruction *inst,
unsigned instno )
{
unsigned i;
boolean first_reg = TRUE;
- CHR( '\n' );
+ EOL();
UID( instno );
CHR( ':' );
ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
@@ -939,6 +767,31 @@ dump_instruction_short(
SID( dst->DstRegister.Index );
CHR( ']' );
+ switch (dst->DstRegisterExtModulate.Modulate) {
+ case TGSI_MODULATE_1X:
+ break;
+ case TGSI_MODULATE_2X:
+ TXT( "_2X" );
+ break;
+ case TGSI_MODULATE_4X:
+ TXT( "_4X" );
+ break;
+ case TGSI_MODULATE_8X:
+ TXT( "_8X" );
+ break;
+ case TGSI_MODULATE_HALF:
+ TXT( "_D2" );
+ break;
+ case TGSI_MODULATE_QUARTER:
+ TXT( "_D4" );
+ break;
+ case TGSI_MODULATE_EIGHTH:
+ TXT( "_D8" );
+ break;
+ default:
+ assert( 0 );
+ }
+
if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
CHR( '.' );
if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
@@ -966,18 +819,18 @@ dump_instruction_short(
}
CHR( ' ' );
- if( src->SrcRegisterExtMod.Complement ) {
- TXT( "(1 - " );
- }
- if( src->SrcRegisterExtMod.Negate ) {
- CHR( '-' );
- }
- if( src->SrcRegisterExtMod.Absolute ) {
+ if (src->SrcRegisterExtMod.Negate)
+ TXT( "-(" );
+ if (src->SrcRegisterExtMod.Absolute)
CHR( '|' );
- }
- if( src->SrcRegister.Negate ) {
+ if (src->SrcRegisterExtMod.Scale2X)
+ TXT( "2*(" );
+ if (src->SrcRegisterExtMod.Bias)
+ CHR( '(' );
+ if (src->SrcRegisterExtMod.Complement)
+ TXT( "1-(" );
+ if (src->SrcRegister.Negate)
CHR( '-' );
- }
ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
@@ -985,35 +838,37 @@ dump_instruction_short(
SID( src->SrcRegister.Index );
CHR( ']' );
- if (src->SrcRegister.Extended) {
- if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
- src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
- src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
- src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
- CHR( '.' );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
- }
- }
- else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
- src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
- src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
- src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) {
+ if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+ src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+ src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+ src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
}
+ if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+ src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+ src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+ src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
+ CHR( '.' );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
+ }
- if( src->SrcRegisterExtMod.Absolute ) {
+ if (src->SrcRegisterExtMod.Complement)
+ CHR( ')' );
+ if (src->SrcRegisterExtMod.Bias)
+ TXT( ")-.5" );
+ if (src->SrcRegisterExtMod.Scale2X)
+ CHR( ')' );
+ if (src->SrcRegisterExtMod.Absolute)
CHR( '|' );
- }
- if( src->SrcRegisterExtMod.Complement ) {
+ if (src->SrcRegisterExtMod.Negate)
CHR( ')' );
- }
first_reg = FALSE;
}
@@ -1037,7 +892,6 @@ dump_instruction_short(
static void
dump_instruction_verbose(
- struct gen_dump *dump,
struct tgsi_full_instruction *inst,
unsigned ignored,
unsigned deflt,
@@ -1065,7 +919,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
@@ -1119,7 +973,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
@@ -1137,7 +991,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
@@ -1158,7 +1012,7 @@ dump_instruction_verbose(
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
- CHR( '\n' );
+ EOL();
TXT( "\nFile : " );
ENM( dst->DstRegister.File, TGSI_FILES );
if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
@@ -1189,7 +1043,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
@@ -1227,7 +1081,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
@@ -1249,7 +1103,7 @@ dump_instruction_verbose(
struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
- CHR( '\n' );
+ EOL();
TXT( "\nFile : ");
ENM( src->SrcRegister.File, TGSI_FILES );
if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
@@ -1294,7 +1148,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
@@ -1340,7 +1194,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
@@ -1375,9 +1229,8 @@ dump_instruction_verbose(
}
}
-static void
-dump_gen(
- struct gen_dump *dump,
+void
+tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags )
{
@@ -1389,18 +1242,17 @@ dump_gen(
unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
unsigned instno = 0;
- dump->tabs = 0;
-
- /* sanity check */
+ /* sanity checks */
assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+ assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
tgsi_parse_init( &parse, tokens );
TXT( "tgsi-dump begin -----------------" );
- CHR( '\n' );
+ EOL();
ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
- CHR( ' ' );
UID( parse.FullVersion.Version.MajorVersion );
CHR( '.' );
UID( parse.FullVersion.Version.MinorVersion );
@@ -1410,7 +1262,7 @@ dump_gen(
UID( parse.FullVersion.Version.MajorVersion );
TXT( "\nMinorVersion: " );
UID( parse.FullVersion.Version.MinorVersion );
- CHR( '\n' );
+ EOL();
TXT( "\nHeaderSize: " );
UID( parse.FullHeader.Header.HeaderSize );
@@ -1418,7 +1270,7 @@ dump_gen(
UID( parse.FullHeader.Header.BodySize );
TXT( "\nProcessor : " );
ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
- CHR( '\n' );
+ EOL();
}
fi = tgsi_default_full_instruction();
@@ -1430,19 +1282,16 @@ dump_gen(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
dump_declaration_short(
- dump,
&parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
dump_immediate_short(
- dump,
&parse.FullToken.FullImmediate );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
dump_instruction_short(
- dump,
&parse.FullToken.FullInstruction,
instno );
instno++;
@@ -1467,7 +1316,6 @@ dump_gen(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
dump_declaration_verbose(
- dump,
&parse.FullToken.FullDeclaration,
ignored,
deflt,
@@ -1476,14 +1324,12 @@ dump_gen(
case TGSI_TOKEN_TYPE_IMMEDIATE:
dump_immediate_verbose(
- dump,
&parse.FullToken.FullImmediate,
ignored );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
dump_instruction_verbose(
- dump,
&parse.FullToken.FullInstruction,
ignored,
deflt,
@@ -1494,7 +1340,7 @@ dump_gen(
assert( 0 );
}
- CHR( '\n' );
+ EOL();
}
}
@@ -1502,86 +1348,3 @@ dump_gen(
tgsi_parse_free( &parse );
}
-
-
-static void
-sanity_checks(void)
-{
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
- assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
-}
-
-
-void
-tgsi_dump(
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct file_dump dump;
-
- sanity_checks();
-
- dump.base.write = _file_dump_write;
-#if 0
- {
- static unsigned counter = 0;
- char buffer[64];
- sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
- dump.file = fopen( buffer, "wt" );
- }
-#else
- dump.file = stderr;
-#endif
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
-#if 0
- fclose( dump.file );
-#endif
-}
-
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct text_dump dump;
-
- dump.base.write = _text_dump_write;
- dump.text = NULL;
- dump.length = 0;
- dump.capacity = 0;
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
- *str = dump.text;
-}
-
-
-void tgsi_debug_dump( struct tgsi_token *tokens )
-{
- char *str, *p;
-
- tgsi_dump_str( &str, tokens, 0 );
-
- p = str;
- while (p != NULL)
- {
- char *end = strchr( p, '\n' );
- if (end != NULL)
- {
- *end++ = '\0';
- }
- debug_printf( "%s\n", p );
- p = end;
- }
-
- FREE( str );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index 51d79a0362..beb0155d56 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -14,16 +14,6 @@ tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags );
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags );
-
-/* Dump to debug_printf()
- */
-void tgsi_debug_dump( struct tgsi_token *tokens );
-
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index c3526cb71f..5c0b0bfd61 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -43,7 +43,7 @@ tgsi_full_token_free(
union tgsi_full_token *full_token )
{
if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
- FREE( full_token->FullImmediate.u.Pointer );
+ FREE( (void *) full_token->FullImmediate.u.Pointer );
}
}
@@ -156,7 +156,7 @@ tgsi_parse_token(
imm->u.Pointer = MALLOC(
sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- next_token( ctx, &imm->u.ImmediateFloat32[i] );
+ next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
}
break;
@@ -330,3 +330,18 @@ tgsi_num_tokens(const struct tgsi_token *tokens)
}
return 0;
}
+
+
+/**
+ * Make a new copy of a token array.
+ */
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens)
+{
+ unsigned n = tgsi_num_tokens(tokens);
+ unsigned bytes = n * sizeof(struct tgsi_token);
+ struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes);
+ if (new_tokens)
+ memcpy(new_tokens, tokens, bytes);
+ return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index a98e88e343..4102101093 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -1,6 +1,35 @@
-#if !defined TGSI_PARSE_H
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_PARSE_H
#define TGSI_PARSE_H
+#include "pipe/p_shader_tokens.h"
+
#if defined __cplusplus
extern "C" {
#endif
@@ -50,8 +79,8 @@ struct tgsi_full_immediate
struct tgsi_immediate Immediate;
union
{
- void *Pointer;
- struct tgsi_immediate_float32 *ImmediateFloat32;
+ const void *Pointer;
+ const struct tgsi_immediate_float32 *ImmediateFloat32;
} u;
};
@@ -116,6 +145,8 @@ tgsi_parse_token(
unsigned
tgsi_num_tokens(const struct tgsi_token *tokens);
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens);
#if defined __cplusplus
}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index ea4a72967d..65650ed22a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -103,18 +103,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->file_max[file] = MAX2(info->file_max[file], (int)i);
if (file == TGSI_FILE_INPUT) {
- info->input_semantic_name[info->num_inputs]
- = (ubyte)fulldecl->Semantic.SemanticName;
- info->input_semantic_index[info->num_inputs]
- = (ubyte)fulldecl->Semantic.SemanticIndex;
+ info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+ info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_inputs++;
}
if (file == TGSI_FILE_OUTPUT) {
- info->output_semantic_name[info->num_outputs]
- = (ubyte)fulldecl->Semantic.SemanticName;
- info->output_semantic_index[info->num_outputs]
- = (ubyte)fulldecl->Semantic.SemanticIndex;
+ info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+ info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
info->num_outputs++;
}
@@ -137,6 +133,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
}
+ assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
+ assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
+
info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
info->opcode_count[TGSI_OPCODE_KILP]);
diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile
new file mode 100644
index 0000000000..ad2a5b705e
--- /dev/null
+++ b/src/gallium/auxiliary/translate/Makefile
@@ -0,0 +1,15 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = translate
+
+C_SOURCES = \
+ translate_generic.c \
+ translate_sse.c \
+ translate.c \
+ translate_cache.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript
new file mode 100644
index 0000000000..9553a67537
--- /dev/null
+++ b/src/gallium/auxiliary/translate/SConscript
@@ -0,0 +1,12 @@
+Import('*')
+
+translate = env.ConvenienceLibrary(
+ target = 'translate',
+ source = [
+ 'translate_generic.c',
+ 'translate_sse.c',
+ 'translate.c',
+ 'translate_cache.c',
+ ])
+
+auxiliaries.insert(0, translate)
diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/translate/translate.c
index d6220b5f62..b04bc6eefd 100644
--- a/src/gallium/auxiliary/draw/draw_debug.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -30,84 +30,19 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "draw_private.h"
-#include "draw_context.h"
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "translate.h"
-
-
-static void
-draw_prim_info(unsigned prim, unsigned *first, unsigned *incr)
-{
- assert(prim >= PIPE_PRIM_POINTS);
- assert(prim <= PIPE_PRIM_POLYGON);
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- *first = 1;
- *incr = 1;
- break;
- case PIPE_PRIM_LINES:
- *first = 2;
- *incr = 2;
- break;
- case PIPE_PRIM_LINE_STRIP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_LINE_LOOP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLES:
- *first = 3;
- *incr = 3;
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- case PIPE_PRIM_POLYGON:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_QUADS:
- *first = 4;
- *incr = 4;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- *first = 4;
- *incr = 2;
- break;
- default:
- assert(0);
- *first = 1;
- *incr = 1;
- break;
- }
-}
-
-
-unsigned
-draw_trim_prim( unsigned mode, unsigned count )
+struct translate *translate_create( const struct translate_key *key )
{
- unsigned length, first, incr;
+ struct translate *translate = NULL;
- draw_prim_info( mode, &first, &incr );
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ translate = translate_sse2_create( key );
+ if (translate)
+ return translate;
+#endif
- if (count < first)
- length = 0;
- else
- length = count - (count - first) % incr;
-
- return length;
-}
-
-
-boolean
-draw_validate_prim( unsigned mode, unsigned count )
-{
- return (count > 0 &&
- count == draw_trim_prim( mode, count ));
+ return translate_generic_create( key );
}
-
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
new file mode 100644
index 0000000000..b8210af50c
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2008 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * Vertex fetch/store/convert code. This functionality is used in two places:
+ * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
+ * arrays and convert to format needed by vertex shaders.
+ * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
+ * (which is the organization used throughout the draw/prim pipeline) to
+ * hardware-specific formats and emit into hardware vertex buffers.
+ *
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+#ifndef _TRANSLATE_H
+#define _TRANSLATE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+
+struct translate_element
+{
+ enum pipe_format input_format;
+ enum pipe_format output_format;
+ unsigned input_buffer;
+ unsigned input_offset; /* can't really reduce the size of these */
+ unsigned output_offset;
+};
+
+
+struct translate_key {
+ unsigned output_stride;
+ unsigned nr_elements;
+ struct translate_element element[PIPE_MAX_ATTRIBS];
+};
+
+
+struct translate {
+ struct translate_key key;
+
+ void (*release)( struct translate * );
+
+ void (*set_buffer)( struct translate *,
+ unsigned i,
+ const void *ptr,
+ unsigned stride );
+
+ void (*run_elts)( struct translate *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+ void (*run)( struct translate *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+};
+
+
+
+#if 0
+struct translate_context *translate_context_create( void );
+void translate_context_destroy( struct translate_context * );
+
+struct translate *translate_lookup_or_create( struct translate_context *tctx,
+ const struct translate_key *key );
+#endif
+
+
+struct translate *translate_create( const struct translate_key *key );
+
+static INLINE int translate_keysize( const struct translate_key *key )
+{
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
+}
+
+static INLINE int translate_key_compare( const struct translate_key *a,
+ const struct translate_key *b )
+{
+ int keysize = translate_keysize(a);
+ return memcmp(a, b, keysize);
+}
+
+
+static INLINE void translate_key_sanitize( struct translate_key *a )
+{
+ int keysize = translate_keysize(a);
+ char *ptr = (char *)a;
+ memset(ptr + keysize, 0, sizeof(*a) - keysize);
+}
+
+
+/*******************************************************************************
+ * Private:
+ */
+struct translate *translate_sse2_create( const struct translate_key *key );
+
+struct translate *translate_generic_create( const struct translate_key *key );
+
+
+#endif
diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c
new file mode 100644
index 0000000000..115dc9287e
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_cache.c
@@ -0,0 +1,102 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "translate.h"
+#include "translate_cache.h"
+
+#include "cso_cache/cso_cache.h"
+#include "cso_cache/cso_hash.h"
+
+struct translate_cache {
+ struct cso_hash *hash;
+};
+
+struct translate_cache * translate_cache_create( void )
+{
+ struct translate_cache *cache = MALLOC_STRUCT(translate_cache);
+ cache->hash = cso_hash_create();
+ return cache;
+}
+
+
+static INLINE void delete_translates(struct translate_cache *cache)
+{
+ struct cso_hash *hash = cache->hash;
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ struct translate *state = (struct translate*)cso_hash_iter_data(iter);
+ iter = cso_hash_iter_next(iter);
+ if (state) {
+ state->release(state);
+ }
+ }
+}
+
+void translate_cache_destroy(struct translate_cache *cache)
+{
+ delete_translates(cache);
+ cso_hash_delete(cache->hash);
+ FREE(cache);
+}
+
+
+static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+{
+ unsigned size = sizeof(struct translate_key) -
+ sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements);
+ return size;
+}
+
+static INLINE unsigned create_key(struct translate_key *key)
+{
+ unsigned hash_key;
+ unsigned size = translate_hash_key_size(key);
+ /*debug_printf("key size = %d, (els = %d)\n",
+ size, key->nr_elements);*/
+ hash_key = cso_construct_key(key, size);
+ return hash_key;
+}
+
+struct translate * translate_cache_find(struct translate_cache *cache,
+ struct translate_key *key)
+{
+ unsigned hash_key = create_key(key);
+ struct translate *translate = (struct translate*)
+ cso_hash_find_data_from_template(cache->hash,
+ hash_key,
+ key, sizeof(*key));
+
+ if (!translate) {
+ /* create/insert */
+ translate = translate_create(key);
+ cso_hash_insert(cache->hash, hash_key, translate);
+ }
+
+ return translate;
+}
diff --git a/src/gallium/auxiliary/translate/translate_cache.h b/src/gallium/auxiliary/translate/translate_cache.h
new file mode 100644
index 0000000000..7dba871e57
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_cache.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2008 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _TRANSLATE_CACHE_H
+#define _TRANSLATE_CACHE_H
+
+
+/*******************************************************************************
+ * Translate cache.
+ * Simply used to cache created translates. Avoids unecessary creation of
+ * translate's if one suitable for a given translate_key has already been
+ * created.
+ *
+ * Note: this functionality depends and requires the CSO module.
+ */
+struct translate_cache;
+
+struct translate_key;
+struct translate;
+
+struct translate_cache *translate_cache_create( void );
+void translate_cache_destroy(struct translate_cache *cache);
+
+/**
+ * Will try to find a translate structure matched by the given key.
+ * If such a structure doesn't exist in the cache the function
+ * will automatically create it, insert it in the cache and
+ * return the created version.
+ *
+ */
+struct translate *translate_cache_find(struct translate_cache *cache,
+ struct translate_key *key);
+
+#endif
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
new file mode 100644
index 0000000000..402780ee53
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -0,0 +1,676 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "translate.h"
+
+
+#define DRAW_DBG 0
+
+typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*emit_func)(const float *attrib, void *ptr);
+
+
+
+struct translate_generic {
+ struct translate translate;
+
+ struct {
+ fetch_func fetch;
+ unsigned buffer;
+ unsigned input_offset;
+
+ emit_func emit;
+ unsigned output_offset;
+
+ char *input_ptr;
+ unsigned input_stride;
+
+ } attrib[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_attrib;
+};
+
+
+static struct translate_generic *translate_generic( struct translate *translate )
+{
+ return (struct translate_generic *)translate;
+}
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \
+static void \
+fetch_##NAME(const void *ptr, float *attrib) \
+{ \
+ const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \
+ unsigned i; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ attrib[i] = FROM(i); \
+ } \
+ \
+ for (; i < 4; i++) { \
+ attrib[i] = defaults[i]; \
+ } \
+} \
+ \
+static void \
+emit_##NAME(const float *attrib, void *ptr) \
+{ \
+ unsigned i; \
+ TYPE *out = (TYPE *)ptr; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ out[i] = TO(attrib[i]); \
+ } \
+}
+
+
+#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i])
+#define FROM_32_FLOAT(i) (((float *) ptr)[i])
+
+#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i])
+#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i])
+#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i])
+
+#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i])
+#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i])
+#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i])
+
+#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f)
+#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f)
+#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f)
+
+#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f)
+#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f)
+#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f)
+
+#define TO_64_FLOAT(x) ((double) x)
+#define TO_32_FLOAT(x) (x)
+
+#define TO_8_USCALED(x) ((unsigned char) x)
+#define TO_16_USCALED(x) ((unsigned short) x)
+#define TO_32_USCALED(x) ((unsigned int) x)
+
+#define TO_8_SSCALED(x) ((char) x)
+#define TO_16_SSCALED(x) ((short) x)
+#define TO_32_SSCALED(x) ((int) x)
+
+#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f))
+#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f))
+#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f))
+
+#define TO_8_SNORM(x) ((char) (x * 127.0f))
+#define TO_16_SNORM(x) ((short) (x * 32767.0f))
+#define TO_32_SNORM(x) ((int) (x * 2147483647.0f))
+
+
+
+ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT )
+
+ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT )
+
+ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED )
+
+ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED )
+
+ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM )
+
+ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM )
+
+ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED )
+
+ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED )
+
+ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM )
+
+ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM )
+
+ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED )
+
+ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED )
+
+ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM )
+
+ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM )
+
+ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+
+
+
+static void
+fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+{
+ attrib[2] = FROM_8_UNORM(0);
+ attrib[1] = FROM_8_UNORM(1);
+ attrib[0] = FROM_8_UNORM(2);
+ attrib[3] = FROM_8_UNORM(3);
+}
+
+static void
+emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
+{
+ ubyte *out = (ubyte *)ptr;
+ out[2] = TO_8_UNORM(attrib[0]);
+ out[1] = TO_8_UNORM(attrib[1]);
+ out[0] = TO_8_UNORM(attrib[2]);
+ out[3] = TO_8_UNORM(attrib[3]);
+}
+
+static void
+fetch_NULL( const void *ptr, float *attrib )
+{
+ attrib[0] = 0;
+ attrib[1] = 0;
+ attrib[2] = 0;
+ attrib[3] = 1;
+}
+
+static void
+emit_NULL( const float *attrib, void *ptr )
+{
+ /* do nothing is the only sensible option */
+}
+
+static fetch_func get_fetch_func( enum pipe_format format )
+{
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return fetch_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return fetch_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return fetch_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return fetch_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return fetch_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return fetch_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return fetch_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return fetch_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return fetch_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return fetch_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return fetch_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return fetch_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return fetch_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return fetch_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return fetch_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return fetch_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return fetch_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return fetch_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return fetch_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return fetch_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return fetch_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return fetch_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return fetch_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return fetch_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return fetch_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return fetch_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return fetch_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return fetch_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return fetch_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return fetch_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return fetch_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return fetch_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return fetch_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return fetch_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return fetch_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return fetch_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return fetch_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return fetch_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return fetch_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return fetch_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return fetch_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return fetch_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return fetch_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return fetch_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return fetch_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return fetch_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return fetch_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return fetch_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return fetch_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return fetch_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return fetch_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return fetch_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return fetch_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return fetch_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return fetch_A8R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return fetch_B8G8R8A8_UNORM;
+
+ default:
+ assert(0);
+ return fetch_NULL;
+ }
+}
+
+
+
+
+static emit_func get_emit_func( enum pipe_format format )
+{
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return emit_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return emit_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return emit_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return emit_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return emit_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return emit_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return emit_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return emit_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return emit_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return emit_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return emit_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return emit_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return emit_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return emit_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return emit_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return emit_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return emit_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return emit_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return emit_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return emit_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return emit_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return emit_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return emit_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return emit_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return emit_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return emit_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return emit_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return emit_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return emit_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return emit_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return emit_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return emit_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return emit_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return emit_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return emit_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return emit_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return emit_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return emit_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return emit_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return emit_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return emit_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return emit_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return emit_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return emit_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return emit_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return emit_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return emit_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return emit_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return emit_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return emit_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return emit_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return emit_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return emit_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return emit_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return emit_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return emit_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return emit_A8R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return emit_B8G8R8A8_UNORM;
+
+ default:
+ assert(0);
+ return emit_NULL;
+ }
+}
+
+
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = *elts++;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+
+ const char *src = (tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt);
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ tg->attrib[attr].fetch( src, data );
+
+ if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
+ i, elt, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void generic_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = start + i;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+
+ const char *src = (tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt);
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ tg->attrib[attr].fetch( src, data );
+
+ if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
+ i, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void generic_set_buffer( struct translate *translate,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ unsigned i;
+
+ for (i = 0; i < tg->nr_attrib; i++) {
+ if (tg->attrib[i].buffer == buf) {
+ tg->attrib[i].input_ptr = ((char *)ptr +
+ tg->attrib[i].input_offset);
+ tg->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void generic_release( struct translate *translate )
+{
+ /* Refcount?
+ */
+ FREE(translate);
+}
+
+struct translate *translate_generic_create( const struct translate_key *key )
+{
+ struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
+ unsigned i;
+
+ if (tg == NULL)
+ return NULL;
+
+ tg->translate.key = *key;
+ tg->translate.release = generic_release;
+ tg->translate.set_buffer = generic_set_buffer;
+ tg->translate.run_elts = generic_run_elts;
+ tg->translate.run = generic_run;
+
+ for (i = 0; i < key->nr_elements; i++) {
+
+ tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+ tg->attrib[i].buffer = key->element[i].input_buffer;
+ tg->attrib[i].input_offset = key->element[i].input_offset;
+
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ tg->attrib[i].output_offset = key->element[i].output_offset;
+
+ }
+
+ tg->nr_attrib = key->nr_elements;
+
+
+ return &tg->translate;
+}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
new file mode 100644
index 0000000000..a54ac5a82f
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+#include "util/u_simple_list.h"
+
+#include "translate.h"
+
+
+#if defined(__i386__) || defined(__386__) || defined(i386)
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+#ifdef WIN32
+#define RTASM __cdecl
+#else
+#define RTASM
+#endif
+
+typedef void (RTASM *run_func)( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
+
+typedef void (RTASM *run_elts_func)( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
+
+
+struct translate_sse {
+ struct translate translate;
+
+ struct x86_function linear_func;
+ struct x86_function elt_func;
+ struct x86_function *func;
+
+ boolean loaded_identity;
+ boolean loaded_255;
+ boolean loaded_inv_255;
+
+ float identity[4];
+ float float_255[4];
+ float inv_255[4];
+
+ struct {
+ char *input_ptr;
+ unsigned input_stride;
+ } attrib[PIPE_MAX_ATTRIBS];
+
+ run_func gen_run;
+ run_elts_func gen_run_elts;
+
+};
+
+static int get_offset( const void *a, const void *b )
+{
+ return (const char *)b - (const char *)a;
+}
+
+
+
+static struct x86_reg get_identity( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 6);
+
+ if (!p->loaded_identity) {
+ /* Nasty:
+ */
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_identity = TRUE;
+ p->identity[0] = 0;
+ p->identity[1] = 0;
+ p->identity[2] = 0;
+ p->identity[3] = 1;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->identity[0])));
+ }
+
+ return reg;
+}
+
+static struct x86_reg get_255( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 6);
+
+ if (!p->loaded_255) {
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_255 = TRUE;
+ p->float_255[0] =
+ p->float_255[1] =
+ p->float_255[2] =
+ p->float_255[3] = 255.0f;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->float_255[0])));
+ }
+
+ return reg;
+ return x86_make_reg(file_XMM, 7);
+}
+
+static struct x86_reg get_inv_255( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 5);
+
+ if (!p->loaded_inv_255) {
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_inv_255 = TRUE;
+ p->inv_255[0] =
+ p->inv_255[1] =
+ p->inv_255[2] =
+ p->inv_255[3] = 1.0f / 255.0f;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->inv_255[0])));
+ }
+
+ return reg;
+}
+
+
+static void emit_load_R32G32B32A32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ sse_movups(p->func, data, arg0);
+}
+
+static void emit_load_R32G32B32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1
+ * 0 0 c 1
+ * a b c 1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+}
+
+static void emit_load_R32G32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ sse_movups(p->func, data, get_identity(p) );
+ sse_movlps(p->func, data, arg0);
+}
+
+
+static void emit_load_R32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ sse_orps(p->func, data, get_identity(p) );
+}
+
+
+static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg src )
+{
+
+ /* Load and unpack twice:
+ */
+ sse_movss(p->func, data, src);
+ sse2_punpcklbw(p->func, data, get_identity(p));
+ sse2_punpcklbw(p->func, data, get_identity(p));
+
+ /* Convert to float:
+ */
+ sse2_cvtdq2ps(p->func, data, data);
+
+
+ /* Scale by 1/255.0
+ */
+ sse_mulps(p->func, data, get_inv_255(p));
+}
+
+
+
+
+static void emit_store_R32G32B32A32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movups(p->func, dest, dataXMM);
+}
+
+static void emit_store_R32G32B32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ /* Emit two, shuffle, emit one.
+ */
+ sse_movlps(p->func, dest, dataXMM);
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
+}
+
+static void emit_store_R32G32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(p->func, dest, dataXMM);
+}
+
+static void emit_store_R32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movss(p->func, dest, dataXMM);
+}
+
+
+
+static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ /* Scale by 255.0
+ */
+ sse_mulps(p->func, dataXMM, get_255(p));
+
+ /* Pack and emit:
+ */
+ sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+ sse2_packssdw(p->func, dataXMM, dataXMM);
+ sse2_packuswb(p->func, dataXMM, dataXMM);
+ sse_movss(p->func, dest, dataXMM);
+}
+
+
+
+
+
+static void get_src_ptr( struct translate_sse *p,
+ struct x86_reg srcEAX,
+ struct x86_reg translateREG,
+ struct x86_reg eltREG,
+ unsigned a )
+{
+ struct x86_reg input_ptr =
+ x86_make_disp(translateREG,
+ get_offset(p, &p->attrib[a].input_ptr));
+
+ struct x86_reg input_stride =
+ x86_make_disp(translateREG,
+ get_offset(p, &p->attrib[a].input_stride));
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(p->func, srcEAX, input_stride);
+ x86_imul(p->func, srcEAX, eltREG);
+ x86_add(p->func, srcEAX, input_ptr);
+}
+
+
+/* Extended swizzles? Maybe later.
+ */
+static void emit_swizzle( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg src,
+ unsigned shuffle )
+{
+ sse_shufps(p->func, dest, src, shuffle);
+}
+
+
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg srcECX,
+ struct x86_reg dstEAX)
+{
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+
+ switch (a->input_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_load_R32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_load_R32G32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_load_R32G32B32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_load_R32G32B32A32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
+ emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
+ break;
+ default:
+ return FALSE;
+ }
+
+ switch (a->output_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_store_R32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_store_R32G32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_store_R32G32B32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_store_R32G32B32A32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Build run( struct translate *translate,
+ * unsigned start,
+ * unsigned count,
+ * void *output_buffer )
+ * or
+ * run_elts( struct translate *translate,
+ * unsigned *elts,
+ * unsigned count,
+ * void *output_buffer )
+ *
+ * Lots of hardcoding
+ *
+ * EAX -- pointer to current output vertex
+ * ECX -- pointer to current attribute
+ *
+ */
+static boolean build_vertex_emit( struct translate_sse *p,
+ struct x86_function *func,
+ boolean linear )
+{
+ struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX);
+ struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX);
+ struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX);
+ struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+ int fixup, label;
+ unsigned j;
+
+ p->func = func;
+ p->loaded_inv_255 = FALSE;
+ p->loaded_255 = FALSE;
+ p->loaded_identity = FALSE;
+
+ x86_init_func(p->func);
+
+ /* Push a few regs?
+ */
+ x86_push(p->func, countEBP);
+ x86_push(p->func, translateESI);
+ x86_push(p->func, idxEBX);
+
+ /* Get vertex count, compare to zero
+ */
+ x86_xor(p->func, idxEBX, idxEBX);
+ x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3));
+ x86_cmp(p->func, countEBP, idxEBX);
+ fixup = x86_jcc_forward(p->func, cc_E);
+
+ /* If linear, idx is the current element, otherwise it is a pointer
+ * to the current element.
+ */
+ x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2));
+
+ /* Initialize destination register.
+ */
+ x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4));
+
+ /* Move argument 1 (translate_sse pointer) into a reg:
+ */
+ x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1));
+
+
+ /* always load, needed or not:
+ */
+
+ /* Note address for loop jump */
+ label = x86_get_label(p->func);
+
+
+ for (j = 0; j < p->translate.key.nr_elements; j++) {
+ const struct translate_element *a = &p->translate.key.element[j];
+
+ struct x86_reg destEAX = x86_make_disp(vertexECX,
+ a->output_offset);
+
+ /* Figure out source pointer address:
+ */
+ if (linear) {
+ get_src_ptr(p, srcEAX, translateESI, idxEBX, j);
+ }
+ else {
+ get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j);
+ }
+
+ if (!translate_attr( p, a, x86_deref(srcEAX), destEAX ))
+ return FALSE;
+ }
+
+ /* Next vertex:
+ */
+ x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride));
+
+ /* Incr index
+ */ /* Emit code for each of the attributes. Currently routes
+ * everything through SSE registers, even when it might be more
+ * efficient to stick with regular old x86. No optimization or
+ * other tricks - enough new ground to cover here just getting
+ * things working.
+ */
+
+ if (linear) {
+ x86_inc(p->func, idxEBX);
+ }
+ else {
+ x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4));
+ }
+
+ /* decr count, loop if not zero
+ */
+ x86_dec(p->func, countEBP);
+ x86_test(p->func, countEBP, countEBP);
+ x86_jcc(p->func, cc_NZ, label);
+
+ /* Exit mmx state?
+ */
+ if (p->func->need_emms)
+ mmx_emms(p->func);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(p->func, fixup);
+
+ /* Pop regs and return
+ */
+
+ x86_pop(p->func, idxEBX);
+ x86_pop(p->func, translateESI);
+ x86_pop(p->func, countEBP);
+ x86_ret(p->func);
+
+ return TRUE;
+}
+
+
+
+
+
+
+
+static void translate_sse_set_buffer( struct translate *translate,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+ unsigned i;
+
+ for (i = 0; i < p->translate.key.nr_elements; i++) {
+ if (p->translate.key.element[i].input_buffer == buf) {
+ p->attrib[i].input_ptr = ((char *)ptr +
+ p->translate.key.element[i].input_offset);
+ p->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void translate_sse_release( struct translate *translate )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ x86_release_func( &p->linear_func );
+ x86_release_func( &p->elt_func );
+
+ FREE(p);
+}
+
+static void translate_sse_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ p->gen_run_elts( translate,
+ elts,
+ count,
+ output_buffer );
+}
+
+static void translate_sse_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ p->gen_run( translate,
+ start,
+ count,
+ output_buffer );
+}
+
+
+struct translate *translate_sse2_create( const struct translate_key *key )
+{
+ struct translate_sse *p = NULL;
+
+ if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ goto fail;
+
+ p = CALLOC_STRUCT( translate_sse );
+ if (p == NULL)
+ goto fail;
+
+ p->translate.key = *key;
+ p->translate.release = translate_sse_release;
+ p->translate.set_buffer = translate_sse_set_buffer;
+ p->translate.run_elts = translate_sse_run_elts;
+ p->translate.run = translate_sse_run;
+
+ if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ goto fail;
+
+ p->gen_run = (run_func)x86_get_func(&p->linear_func);
+ if (p->gen_run == NULL)
+ goto fail;
+
+ p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
+ if (p->gen_run_elts == NULL)
+ goto fail;
+
+ return &p->translate;
+
+ fail:
+ if (p)
+ translate_sse_release( &p->translate );
+
+ return NULL;
+}
+
+
+
+#else
+
+void translate_create_sse( const struct translate_key *key )
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 9b6c2708b6..05bc43131a 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -14,7 +14,9 @@ C_SOURCES = \
u_hash_table.c \
u_mm.c \
u_simple_shaders.c \
- u_snprintf.c
+ u_snprintf.c \
+ u_time.c \
+ u_mm.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 9b3929eb2d..d55d2c7081 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -15,6 +15,7 @@ util = env.ConvenienceLibrary(
'u_mm.c',
'u_simple_shaders.c',
'u_snprintf.c',
+ 'u_time.c',
])
auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index 090e3b7794..4ec1746662 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -26,9 +26,11 @@
**************************************************************************/
+#include "pipe/p_config.h"
+
#include <stdarg.h>
-#ifdef WIN32
+#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
#include <windows.h>
#include <winddi.h>
#else
@@ -39,9 +41,11 @@
#include "pipe/p_compiler.h"
#include "pipe/p_util.h"
#include "pipe/p_debug.h"
+#include "pipe/p_format.h"
+#include "util/u_string.h"
-#ifdef WIN32
+#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
static INLINE void
_EngDebugPrint(const char *format, ...)
{
@@ -55,13 +59,18 @@ _EngDebugPrint(const char *format, ...)
void _debug_vprintf(const char *format, va_list ap)
{
-#ifdef WIN32
+#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
#ifndef WINCE
/* EngDebugPrint does not handle float point arguments, so we need to use
- * our own vsnprintf implementation */
- char buf[512 + 1];
- vsnprintf(buf, sizeof(buf), format, ap);
- _EngDebugPrint("%s", buf);
+ * our own vsnprintf implementation. It is also very slow, so buffer until
+ * we find a newline. */
+ static char buf[512 + 1] = {'\0'};
+ size_t len = strlen(buf);
+ int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
+ if(ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) {
+ _EngDebugPrint("%s", buf);
+ buf[0] = '\0';
+ }
#else
/* TODO: Implement debug print for WINCE */
#endif
@@ -95,7 +104,7 @@ void _debug_break(void)
__asm("int3");
#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__)
_asm {int 3};
-#elif defined(WIN32) && !defined(WINCE)
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
EngDebugBreak();
#else
abort();
@@ -103,7 +112,7 @@ void _debug_break(void)
}
-#ifdef WIN32
+#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
static const char *
find(const char *start, const char *end, char c)
{
@@ -144,7 +153,7 @@ const char *
debug_get_option(const char *name, const char *dfault)
{
const char *result;
-#ifdef WIN32
+#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
ULONG_PTR iFile = 0;
const void *pMap = NULL;
const char *sol, *eol, *sep;
@@ -194,6 +203,8 @@ debug_get_bool_option(const char *name, boolean dfault)
if(str == NULL)
result = dfault;
+ else if(!strcmp(str, "n"))
+ result = FALSE;
else if(!strcmp(str, "no"))
result = FALSE;
else if(!strcmp(str, "0"))
@@ -245,57 +256,16 @@ debug_get_flags_option(const char *name,
}
-#if defined(WIN32)
-ULONG_PTR debug_config_file = 0;
-void *mapped_config_file = 0;
-
-enum {
- eAssertAbortEn = 0x1,
-};
-
-/* Check for aborts enabled. */
-static unsigned abort_en(void)
-{
- if (!mapped_config_file)
- {
- /* Open an 8 byte file for configuration data. */
- mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file);
- }
-
- /* A value of "0" (ascii) in the configuration file will clear the
- * first 8 bits in the test byte.
- *
- * A value of "1" (ascii) in the configuration file will set the
- * first bit in the test byte.
- *
- * A value of "2" (ascii) in the configuration file will set the
- * second bit in the test byte.
- *
- * Currently the only interesting values are 0 and 1, which clear
- * and set abort-on-assert behaviour respectively.
- */
- return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn;
-}
-#else /* WIN32 */
-static unsigned abort_en(void)
-{
- return !GETENV("GALLIUM_ABORT_ON_ASSERT");
-}
-#endif
-
void _debug_assert_fail(const char *expr,
const char *file,
unsigned line,
const char *function)
{
_debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr);
- if (abort_en())
- {
+ if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE))
debug_break();
- } else
- {
+ else
_debug_printf("continuing...\n");
- }
}
@@ -311,7 +281,7 @@ debug_dump_enum(const struct debug_named_value *names,
++names;
}
- snprintf(rest, sizeof(rest), "0x%08lx", value);
+ util_snprintf(rest, sizeof(rest), "0x%08lx", value);
return rest;
}
@@ -344,7 +314,7 @@ debug_dump_flags(const struct debug_named_value *names,
else
first = 0;
- snprintf(rest, sizeof(rest), "0x%08lx", value);
+ util_snprintf(rest, sizeof(rest), "0x%08lx", value);
strncat(output, rest, sizeof(output));
}
@@ -355,3 +325,101 @@ debug_dump_flags(const struct debug_named_value *names,
}
+
+
+
+
+char *pf_sprint_name( char *str, enum pipe_format format )
+{
+ strcpy( str, "PIPE_FORMAT_" );
+ switch (pf_layout( format )) {
+ case PIPE_FORMAT_LAYOUT_RGBAZS:
+ {
+ pipe_format_rgbazs_t rgbazs = (pipe_format_rgbazs_t) format;
+ uint i;
+ uint scale = 1 << (pf_exp8( rgbazs ) * 3);
+
+ for (i = 0; i < 4; i++) {
+ uint size = pf_size_xyzw( rgbazs, i );
+
+ if (size == 0) {
+ break;
+ }
+ switch (pf_swizzle_xyzw( rgbazs, i )) {
+ case PIPE_FORMAT_COMP_R:
+ strcat( str, "R" );
+ break;
+ case PIPE_FORMAT_COMP_G:
+ strcat( str, "G" );
+ break;
+ case PIPE_FORMAT_COMP_B:
+ strcat( str, "B" );
+ break;
+ case PIPE_FORMAT_COMP_A:
+ strcat( str, "A" );
+ break;
+ case PIPE_FORMAT_COMP_0:
+ strcat( str, "0" );
+ break;
+ case PIPE_FORMAT_COMP_1:
+ strcat( str, "1" );
+ break;
+ case PIPE_FORMAT_COMP_Z:
+ strcat( str, "Z" );
+ break;
+ case PIPE_FORMAT_COMP_S:
+ strcat( str, "S" );
+ break;
+ }
+ util_snprintf( &str[strlen( str )], 32, "%u", size * scale );
+ }
+ if (i != 0) {
+ strcat( str, "_" );
+ }
+ switch (pf_type( rgbazs )) {
+ case PIPE_FORMAT_TYPE_UNKNOWN:
+ strcat( str, "NONE" );
+ break;
+ case PIPE_FORMAT_TYPE_FLOAT:
+ strcat( str, "FLOAT" );
+ break;
+ case PIPE_FORMAT_TYPE_UNORM:
+ strcat( str, "UNORM" );
+ break;
+ case PIPE_FORMAT_TYPE_SNORM:
+ strcat( str, "SNORM" );
+ break;
+ case PIPE_FORMAT_TYPE_USCALED:
+ strcat( str, "USCALED" );
+ break;
+ case PIPE_FORMAT_TYPE_SSCALED:
+ strcat( str, "SSCALED" );
+ break;
+ }
+ }
+ break;
+ case PIPE_FORMAT_LAYOUT_YCBCR:
+ {
+ pipe_format_ycbcr_t ycbcr = (pipe_format_ycbcr_t) format;
+
+ strcat( str, "YCBCR" );
+ if (pf_rev( ycbcr )) {
+ strcat( str, "_REV" );
+ }
+ }
+ break;
+ }
+ return str;
+}
+
+
+#ifdef DEBUG
+void debug_print_format(const char *msg, unsigned fmt )
+{
+ char fmtstr[80];
+
+ pf_sprint_name(fmtstr, (enum pipe_format)fmt);
+
+ debug_printf("%s: %s\n", msg, fmtstr);
+}
+#endif
diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c
index c160afe5b7..3b5e4fbaee 100644
--- a/src/gallium/auxiliary/util/p_debug_mem.c
+++ b/src/gallium/auxiliary/util/p_debug_mem.c
@@ -32,9 +32,13 @@
* @author José Fonseca <jrfonseca@tungstengraphics.com>
*/
-#ifdef WIN32
+#include "pipe/p_config.h"
+
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
#include <windows.h>
#include <winddi.h>
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+#include <wdm.h>
#else
#include <stdio.h>
#include <stdlib.h>
@@ -47,9 +51,12 @@
#define DEBUG_MEMORY_MAGIC 0x6e34090aU
-#if defined(WIN32) && !defined(WINCE)
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE)
#define real_malloc(_size) EngAllocMem(0, _size, 'D3AG')
#define real_free(_ptr) EngFreeMem(_ptr)
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+#define real_malloc(_size) ExAllocatePool(0, _size)
+#define real_free(_ptr) ExFreePool(_ptr)
#else
#define real_malloc(_size) malloc(_size)
#define real_free(_ptr) free(_ptr)
@@ -70,8 +77,26 @@ struct debug_memory_header
static struct list_head list = { &list, &list };
-static unsigned long start_no = 0;
-static unsigned long end_no = 0;
+static unsigned long last_no = 0;
+
+
+static INLINE struct debug_memory_header *
+header_from_data(void *data)
+{
+ if(data)
+ return (struct debug_memory_header *)((char *)data - sizeof(struct debug_memory_header));
+ else
+ return NULL;
+}
+
+static INLINE void *
+data_from_header(struct debug_memory_header *hdr)
+{
+ if(hdr)
+ return (void *)((char *)hdr + sizeof(struct debug_memory_header));
+ else
+ return NULL;
+}
void *
@@ -84,7 +109,7 @@ debug_malloc(const char *file, unsigned line, const char *function,
if(!hdr)
return NULL;
- hdr->no = end_no++;
+ hdr->no = last_no++;
hdr->file = file;
hdr->line = line;
hdr->function = function;
@@ -93,7 +118,7 @@ debug_malloc(const char *file, unsigned line, const char *function,
LIST_ADDTAIL(&hdr->head, &list);
- return (void *)((char *)hdr + sizeof(*hdr));
+ return data_from_header(hdr);
}
void
@@ -105,7 +130,7 @@ debug_free(const char *file, unsigned line, const char *function,
if(!ptr)
return;
- hdr = (struct debug_memory_header *)((char *)ptr - sizeof(*hdr));
+ hdr = header_from_data(ptr);
if(hdr->magic != DEBUG_MEMORY_MAGIC) {
debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n",
file, line, function,
@@ -134,27 +159,57 @@ void *
debug_realloc(const char *file, unsigned line, const char *function,
void *old_ptr, size_t old_size, size_t new_size )
{
- void *new_ptr = NULL;
-
- if (new_size != 0) {
- new_ptr = debug_malloc( file, line, function, new_size );
-
- if( new_ptr && old_ptr )
- memcpy( new_ptr, old_ptr, old_size );
+ struct debug_memory_header *old_hdr, *new_hdr;
+ void *new_ptr;
+
+ if(!old_ptr)
+ return debug_malloc( file, line, function, new_size );
+
+ if(!new_size) {
+ debug_free( file, line, function, old_ptr );
+ return NULL;
+ }
+
+ old_hdr = header_from_data(old_ptr);
+ if(old_hdr->magic != DEBUG_MEMORY_MAGIC) {
+ debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n",
+ file, line, function,
+ old_ptr);
+ debug_assert(0);
+ return NULL;
}
+
+ /* alloc new */
+ new_hdr = real_malloc(sizeof(*new_hdr) + new_size);
+ if(!new_hdr)
+ return NULL;
+ new_hdr->no = old_hdr->no;
+ new_hdr->file = old_hdr->file;
+ new_hdr->line = old_hdr->line;
+ new_hdr->function = old_hdr->function;
+ new_hdr->size = new_size;
+ new_hdr->magic = DEBUG_MEMORY_MAGIC;
+ LIST_REPLACE(&old_hdr->head, &new_hdr->head);
+
+ /* copy data */
+ new_ptr = data_from_header(new_hdr);
+ memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size );
+
+ /* free old */
+ old_hdr->magic = 0;
+ real_free(old_hdr);
- debug_free( file, line, function, old_ptr );
return new_ptr;
}
-void
-debug_memory_reset(void)
+unsigned long
+debug_memory_begin(void)
{
- start_no = end_no;
+ return last_no;
}
void
-debug_memory_report(void)
+debug_memory_end(unsigned long start_no)
{
struct list_head *entry;
@@ -163,8 +218,9 @@ debug_memory_report(void)
struct debug_memory_header *hdr;
void *ptr;
hdr = LIST_ENTRY(struct debug_memory_header, entry, head);
- ptr = (void *)((char *)hdr + sizeof(*hdr));
- if(hdr->no >= start_no)
+ ptr = data_from_header(hdr);
+ if(start_no <= hdr->no && hdr->no < last_no ||
+ last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))
debug_printf("%s:%u:%s: %u bytes at %p not freed\n",
hdr->file, hdr->line, hdr->function,
hdr->size, ptr);
diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c
index 520da5cecd..63e1cc6013 100644
--- a/src/gallium/auxiliary/util/p_tile.c
+++ b/src/gallium/auxiliary/util/p_tile.c
@@ -169,6 +169,52 @@ a8r8g8b8_put_tile_rgba(unsigned *dst,
}
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+x8r8g8b8_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const unsigned pixel = *src++;
+ pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
+ pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
+ pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT(0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+x8r8g8b8_put_tile_rgba(unsigned *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ unsigned r, g, b;
+ UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]);
+ *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
+ }
+ p += src_stride;
+ }
+}
+
+
/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
static void
@@ -339,7 +385,7 @@ z16_get_tile_rgba(ushort *src,
-/*** PIPE_FORMAT_U_L8 ***/
+/*** PIPE_FORMAT_L8_UNORM ***/
static void
l8_get_tile_rgba(ubyte *src,
@@ -362,7 +408,7 @@ l8_get_tile_rgba(ubyte *src,
}
-/*** PIPE_FORMAT_U_A8 ***/
+/*** PIPE_FORMAT_A8_UNORM ***/
static void
a8_get_tile_rgba(ubyte *src,
@@ -430,7 +476,7 @@ r16g16b16a16_put_tile_rgba(short *dst,
-/*** PIPE_FORMAT_U_I8 ***/
+/*** PIPE_FORMAT_I8_UNORM ***/
static void
i8_get_tile_rgba(ubyte *src,
@@ -453,7 +499,7 @@ i8_get_tile_rgba(ubyte *src,
}
-/*** PIPE_FORMAT_U_A8_L8 ***/
+/*** PIPE_FORMAT_A8L8_UNORM ***/
static void
a8_l8_get_tile_rgba(ushort *src,
@@ -647,6 +693,9 @@ pipe_get_tile_rgba(struct pipe_context *pipe,
case PIPE_FORMAT_A8R8G8B8_UNORM:
a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
break;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ x8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
break;
@@ -659,16 +708,16 @@ pipe_get_tile_rgba(struct pipe_context *pipe,
case PIPE_FORMAT_R5G6B5_UNORM:
r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
break;
- case PIPE_FORMAT_U_L8:
+ case PIPE_FORMAT_L8_UNORM:
l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
break;
- case PIPE_FORMAT_U_A8:
+ case PIPE_FORMAT_A8_UNORM:
a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
break;
- case PIPE_FORMAT_U_I8:
+ case PIPE_FORMAT_I8_UNORM:
i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
break;
- case PIPE_FORMAT_U_A8_L8:
+ case PIPE_FORMAT_A8L8_UNORM:
a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
@@ -681,6 +730,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe,
z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
break;
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
break;
case PIPE_FORMAT_Z24S8_UNORM:
@@ -723,6 +773,9 @@ pipe_put_tile_rgba(struct pipe_context *pipe,
case PIPE_FORMAT_A8R8G8B8_UNORM:
a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
break;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+ break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
break;
@@ -734,16 +787,16 @@ pipe_put_tile_rgba(struct pipe_context *pipe,
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
break;
- case PIPE_FORMAT_U_L8:
+ case PIPE_FORMAT_L8_UNORM:
/*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
break;
- case PIPE_FORMAT_U_A8:
+ case PIPE_FORMAT_A8_UNORM:
/*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
break;
- case PIPE_FORMAT_U_I8:
+ case PIPE_FORMAT_I8_UNORM:
/*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
break;
- case PIPE_FORMAT_U_A8_L8:
+ case PIPE_FORMAT_A8L8_UNORM:
/*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
@@ -756,6 +809,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe,
/*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
/*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
break;
case PIPE_FORMAT_Z24S8_UNORM:
@@ -800,6 +854,7 @@ pipe_get_tile_z(struct pipe_context *pipe,
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
{
const uint *pSrc
= (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x);
@@ -860,6 +915,7 @@ pipe_put_tile_z(struct pipe_context *pipe,
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
{
uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x);
for (i = 0; i < h; i++) {
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index eec5e600c9..568d62ced1 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -57,6 +57,7 @@ struct blit_state
struct pipe_depth_stencil_alpha_state depthstencil;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_viewport_state viewport;
struct pipe_shader_state vert_shader;
struct pipe_shader_state frag_shader;
@@ -100,7 +101,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
ctx->rasterizer.front_winding = PIPE_WINDING_CW;
ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
- ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ ctx->rasterizer.bypass_clipping = 1;
/*ctx->rasterizer.bypass_vs = 1;*/
/* samplers */
@@ -113,8 +114,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.mag_img_filter = 0; /* set later */
ctx->sampler.normalized_coords = 1;
-#if 0
- /* viewport */
+ /* viewport (identity, we setup vertices in wincoords) */
ctx->viewport.scale[0] = 1.0;
ctx->viewport.scale[1] = 1.0;
ctx->viewport.scale[2] = 1.0;
@@ -123,7 +123,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->viewport.translate[1] = 0.0;
ctx->viewport.translate[2] = 0.0;
ctx->viewport.translate[3] = 0.0;
-#endif
/* vertex shader */
{
@@ -265,6 +264,9 @@ util_blit_pixels(struct blit_state *ctx,
dstY1 = tmp;
}
+ assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE));
+ assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE));
+
/*
* XXX for now we're always creating a temporary texture.
* Strictly speaking that's not always needed.
@@ -293,6 +295,8 @@ util_blit_pixels(struct blit_state *ctx,
src, srcLeft, srcTop, /* src */
srcW, srcH); /* size */
+ pipe->texture_update(pipe, tex, 0, 1 << 0);
+
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -300,11 +304,15 @@ util_blit_pixels(struct blit_state *ctx,
cso_save_samplers(ctx->cso);
cso_save_sampler_textures(ctx->cso);
cso_save_framebuffer(ctx->cso);
+ cso_save_fragment_shader(ctx->cso);
+ cso_save_vertex_shader(ctx->cso);
+ cso_save_viewport(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+ cso_set_viewport(ctx->cso, &ctx->viewport);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -313,11 +321,11 @@ util_blit_pixels(struct blit_state *ctx,
cso_single_sampler_done(ctx->cso);
/* texture */
- pipe->set_sampler_textures(pipe, 1, &tex);
+ cso_set_sampler_textures(ctx->cso, 1, &tex);
/* shaders */
- pipe->bind_fs_state(pipe, ctx->fs);
- pipe->bind_vs_state(pipe, ctx->vs);
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
@@ -344,6 +352,9 @@ util_blit_pixels(struct blit_state *ctx,
cso_restore_samplers(ctx->cso);
cso_restore_sampler_textures(ctx->cso);
cso_restore_framebuffer(ctx->cso);
+ cso_restore_fragment_shader(ctx->cso);
+ cso_restore_vertex_shader(ctx->cso);
+ cso_restore_viewport(ctx->cso);
/* free the texture */
pipe_surface_reference(&texSurf, NULL);
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index 61f1d9bb32..0ce9732e62 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -30,7 +30,11 @@
#define U_BLIT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct pipe_context;
struct pipe_surface;
struct cso_context;
@@ -58,4 +62,8 @@ util_blit_pixels(struct blit_state *ctx,
float z, uint filter);
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h
index 8cb10c9820..d108d92e52 100644
--- a/src/gallium/auxiliary/util/u_double_list.h
+++ b/src/gallium/auxiliary/util/u_double_list.h
@@ -70,6 +70,14 @@ struct list_head
(__list)->prev = (__item); \
} while(0)
+#define LIST_REPLACE(__from, __to) \
+ do { \
+ (__to)->prev = (__from)->prev; \
+ (__to)->next = (__from)->next; \
+ (__from)->next->prev = (__to); \
+ (__from)->prev->next = (__to); \
+ } while (0)
+
#define LIST_DEL(__item) \
do { \
(__item)->prev->next = (__item)->next; \
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 2fd214d22e..c53c512268 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -61,6 +61,7 @@ struct gen_mipmap_state
struct pipe_depth_stencil_alpha_state depthstencil;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_viewport_state viewport;
struct pipe_shader_state vert_shader;
struct pipe_shader_state frag_shader;
@@ -474,7 +475,9 @@ format_to_type_comps(enum pipe_format pformat,
{
switch (pformat) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
*datatype = UBYTE;
*comps = 4;
return;
@@ -490,18 +493,21 @@ format_to_type_comps(enum pipe_format pformat,
*datatype = USHORT_5_6_5;
*comps = 3;
return;
- case PIPE_FORMAT_U_L8:
- case PIPE_FORMAT_U_A8:
- case PIPE_FORMAT_U_I8:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
*datatype = UBYTE;
*comps = 1;
return;
- case PIPE_FORMAT_U_A8_L8:
+ case PIPE_FORMAT_A8L8_UNORM:
*datatype = UBYTE;
*comps = 2;
return;
default:
assert(0);
+ *datatype = UBYTE;
+ *comps = 0;
+ break;
}
}
@@ -712,7 +718,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
ctx->rasterizer.front_winding = PIPE_WINDING_CW;
ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
- ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ ctx->rasterizer.bypass_clipping = 1;
/*ctx->rasterizer.bypass_vs = 1;*/
/* sampler state */
@@ -723,9 +729,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
ctx->sampler.normalized_coords = 1;
-
-#if 0
- /* viewport */
+ /* viewport state (identity, verts are in wincoords) */
ctx->viewport.scale[0] = 1.0;
ctx->viewport.scale[1] = 1.0;
ctx->viewport.scale[2] = 1.0;
@@ -734,7 +738,6 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->viewport.translate[1] = 0.0;
ctx->viewport.translate[2] = 0.0;
ctx->viewport.translate[3] = 0.0;
-#endif
/* vertex shader */
{
@@ -775,23 +778,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height)
{
void *buf;
- ctx->vertices[0][0][0] = -0.5f; /*x*/
- ctx->vertices[0][0][1] = -0.5f; /*y*/
+ ctx->vertices[0][0][0] = 0.0f; /*x*/
+ ctx->vertices[0][0][1] = 0.0f; /*y*/
ctx->vertices[0][1][0] = 0.0f; /*s*/
ctx->vertices[0][1][1] = 0.0f; /*t*/
- ctx->vertices[1][0][0] = width - 0.5f; /*x*/
- ctx->vertices[1][0][1] = -0.5f; /*y*/
- ctx->vertices[1][1][0] = 1.0f; /*s*/
- ctx->vertices[1][1][1] = 0.0f; /*t*/
+ ctx->vertices[1][0][0] = width;
+ ctx->vertices[1][0][1] = 0.0f;
+ ctx->vertices[1][1][0] = 1.0f;
+ ctx->vertices[1][1][1] = 0.0f;
- ctx->vertices[2][0][0] = width - 0.5f;
- ctx->vertices[2][0][1] = height - 0.5f;
+ ctx->vertices[2][0][0] = width;
+ ctx->vertices[2][0][1] = height;
ctx->vertices[2][1][0] = 1.0f;
ctx->vertices[2][1][1] = 1.0f;
- ctx->vertices[3][0][0] = -0.5f;
- ctx->vertices[3][0][1] = height - 0.5f;
+ ctx->vertices[3][0][0] = 0.0f;
+ ctx->vertices[3][0][1] = height;
ctx->vertices[3][1][0] = 0.0f;
ctx->vertices[3][1][1] = 1.0f;
@@ -825,26 +828,6 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
}
-#if 0
-static void
-simple_viewport(struct pipe_context *pipe, uint width, uint height)
-{
- struct pipe_viewport_state vp;
-
- vp.scale[0] = 0.5 * width;
- vp.scale[1] = -0.5 * height;
- vp.scale[2] = 1.0;
- vp.scale[3] = 1.0;
- vp.translate[0] = 0.5 * width;
- vp.translate[1] = 0.5 * height;
- vp.translate[2] = 0.0;
- vp.translate[3] = 0.0;
-
- pipe->set_viewport_state(pipe, &vp);
-}
-#endif
-
-
/**
* Generate mipmap images. It's assumed all needed texture memory is
* already allocated.
@@ -880,17 +863,18 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_save_samplers(ctx->cso);
cso_save_sampler_textures(ctx->cso);
cso_save_framebuffer(ctx->cso);
+ cso_save_fragment_shader(ctx->cso);
+ cso_save_vertex_shader(ctx->cso);
+ cso_save_viewport(ctx->cso);
/* bind our state */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+ cso_set_viewport(ctx->cso, &ctx->viewport);
- pipe->bind_vs_state(pipe, ctx->vs);
- pipe->bind_fs_state(pipe, ctx->fs);
-#if 0
- pipe->set_viewport_state(pipe, &ctx->viewport);
-#endif
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* init framebuffer state */
memset(&fb, 0, sizeof(fb));
@@ -926,11 +910,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
ctx->sampler.lod_bias = (float) srcLevel;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
cso_single_sampler_done(ctx->cso);
-#if 0
- simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]);
-#endif
- pipe->set_sampler_textures(pipe, 1, &pt);
+ cso_set_sampler_textures(ctx->cso, 1, &pt);
/* quad coords in window coords (bypassing clipping, viewport mapping) */
set_vertex_data(ctx,
@@ -954,4 +935,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_restore_samplers(ctx->cso);
cso_restore_sampler_textures(ctx->cso);
cso_restore_framebuffer(ctx->cso);
+ cso_restore_fragment_shader(ctx->cso);
+ cso_restore_vertex_shader(ctx->cso);
+ cso_restore_viewport(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h
index bd9af54fb7..3277024f07 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.h
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.h
@@ -31,6 +31,11 @@
#include "pipe/p_state.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
struct pipe_context;
struct pipe_texture;
struct cso_context;
@@ -52,4 +57,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
struct pipe_texture *pt,
uint face, uint baseLevel, uint lastLevel, uint filter);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index cd13823985..0b917c005f 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -40,6 +40,94 @@
/**
+ * Pack ubyte R,G,B,A into dest pixel.
+ */
+static INLINE void
+util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
+ enum pipe_format format, void *dest)
+{
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (r << 24) | (g << 16) | (b << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (r << 24) | (g << 16) | (b << 8) | 0xff;
+ }
+ return;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (a << 24) | (r << 16) | (g << 8) | b;
+ }
+ return;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (0xff << 24) | (r << 16) | (g << 8) | b;
+ }
+ return;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (b << 24) | (g << 16) | (r << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (b << 24) | (g << 16) | (r << 8) | 0xff;
+ }
+ return;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
+ }
+ return;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+ }
+ return;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+ }
+ return;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ {
+ float *d = (float *) dest;
+ d[0] = (float)r / 255.0f;
+ d[1] = (float)g / 255.0f;
+ d[2] = (float)b / 255.0f;
+ d[3] = (float)a / 255.0f;
+ }
+ return;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ {
+ float *d = (float *) dest;
+ d[0] = (float)r / 255.0f;
+ d[1] = (float)g / 255.0f;
+ d[2] = (float)b / 255.0f;
+ }
+ return;
+
+ /* XXX lots more cases to add */
+ default:
+ debug_print_format("gallium: unhandled format in util_pack_color_ub()", format);
+ assert(0);
+ }
+}
+
+
+/**
* Note rgba outside [0,1] will be clamped for int pixel formats.
*/
static INLINE void
@@ -62,24 +150,54 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
*d = (r << 24) | (g << 16) | (b << 8) | a;
}
return;
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (r << 24) | (g << 16) | (b << 8) | 0xff;
+ }
+ return;
case PIPE_FORMAT_A8R8G8B8_UNORM:
{
uint *d = (uint *) dest;
*d = (a << 24) | (r << 16) | (g << 8) | b;
}
return;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (0xff << 24) | (r << 16) | (g << 8) | b;
+ }
+ return;
case PIPE_FORMAT_B8G8R8A8_UNORM:
{
uint *d = (uint *) dest;
*d = (b << 24) | (g << 16) | (r << 8) | a;
}
return;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (b << 24) | (g << 16) | (r << 8) | 0xff;
+ }
+ return;
case PIPE_FORMAT_R5G6B5_UNORM:
{
ushort *d = (ushort *) dest;
*d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
}
return;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+ }
+ return;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+ }
+ return;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
float *d = (float *) dest;
@@ -99,7 +217,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
return;
/* XXX lots more cases to add */
default:
- debug_printf("gallium: unhandled format in util_pack_color()");
+ debug_print_format("gallium: unhandled format in util_pack_color()", format);
+ assert(0);
}
}
@@ -120,11 +239,14 @@ util_pack_z(enum pipe_format format, double z)
else
return (uint) (z * 0xffffffff);
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
return (uint) (z * 0xffffff);
case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
return ((uint) (z * 0xffffff)) << 8;
default:
- debug_printf("gallium: unhandled fomrat in util_pack_z()");
+ debug_print_format("gallium: unhandled format in util_pack_z()", format);
+ assert(0);
return 0;
}
}
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
index 48426abcb7..c4f4bbd30c 100644
--- a/src/gallium/auxiliary/util/u_snprintf.c
+++ b/src/gallium/auxiliary/util/u_snprintf.c
@@ -166,8 +166,8 @@
#include <config.h>
#else
#ifdef WIN32
-#define vsnprintf rpl_vsnprintf
-#define snprintf rpl_snprintf
+#define vsnprintf util_vsnprintf
+#define snprintf util_snprintf
#define HAVE_VSNPRINTF 0
#define HAVE_SNPRINTF 0
#define HAVE_VASPRINTF 1 /* not needed */
@@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE);
static LDOUBLE mypow10(int);
int
-rpl_vsnprintf(char *str, size_t size, const char *format, va_list args)
+util_vsnprintf(char *str, size_t size, const char *format, va_list args)
{
LDOUBLE fvalue;
INTMAX_T value;
@@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len)
#endif /* NEED_MYMEMCPY */
int
-rpl_vasprintf(char **ret, const char *format, va_list ap)
+util_vasprintf(char **ret, const char *format, va_list ap)
{
size_t size;
int len;
@@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap)
#if !HAVE_SNPRINTF
#if HAVE_STDARG_H
int
-rpl_snprintf(char *str, size_t size, const char *format, ...)
+util_snprintf(char *str, size_t size, const char *format, ...)
#else
int
-rpl_snprintf(va_alist) va_dcl
+util_snprintf(va_alist) va_dcl
#endif /* HAVE_STDARG_H */
{
#if !HAVE_STDARG_H
@@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl
#if !HAVE_ASPRINTF
#if HAVE_STDARG_H
int
-rpl_asprintf(char **ret, const char *format, ...)
+util_asprintf(char **ret, const char *format, ...)
#else
int
-rpl_asprintf(va_alist) va_dcl
+util_asprintf(va_alist) va_dcl
#endif /* HAVE_STDARG_H */
{
#if !HAVE_STDARG_H
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h
new file mode 100644
index 0000000000..b99d4e8021
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Platform independent functions for string manipulation.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef U_STRING_H_
+#define U_STRING_H_
+
+#ifndef WIN32
+#include <stdio.h>
+#endif
+#include <stddef.h>
+#include <stdarg.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef WIN32
+int util_vsnprintf(char *, size_t, const char *, va_list);
+int util_snprintf(char *str, size_t size, const char *format, ...);
+#else
+#define util_vsnprintf vsnprintf
+#define util_snprintf snprintf
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_STRING_H_ */
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
new file mode 100644
index 0000000000..9b97050d51
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -0,0 +1,162 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * OS independent time-manipulation functions.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "util/u_time.h"
+
+#if defined(PIPE_OS_LINUX)
+#include <sys/time.h>
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+#include <windows.h>
+#include <winddi.h>
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
+#include <windows.h>
+#else
+#error Unsupported OS
+#endif
+
+
+#if defined(PIPE_OS_WINDOWS)
+static LONGLONG frequency = 0;
+#if !defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+#define EngQueryPerformanceFrequency(p) QueryPerformanceFrequency((LARGE_INTEGER*)(p))
+#define EngQueryPerformanceCounter(p) QueryPerformanceCounter((LARGE_INTEGER*)(p))
+#endif
+#endif
+
+
+void
+util_time_get(struct util_time *t)
+{
+#if defined(PIPE_OS_LINUX)
+ gettimeofday(&t->tv, NULL);
+#elif defined(PIPE_OS_WINDOWS)
+ EngQueryPerformanceCounter(&t->counter);
+#endif
+}
+
+
+void
+util_time_add(const struct util_time *t1,
+ int64_t usecs,
+ struct util_time *t2)
+{
+#if defined(PIPE_OS_LINUX)
+ t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000;
+ t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000;
+#elif defined(PIPE_OS_WINDOWS)
+ if(!frequency)
+ EngQueryPerformanceFrequency(&frequency);
+ t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL;
+#endif
+}
+
+
+int64_t
+util_time_diff(const struct util_time *t1,
+ const struct util_time *t2)
+{
+#if defined(PIPE_OS_LINUX)
+ return (t2->tv.tv_usec - t1->tv.tv_usec) +
+ (t2->tv.tv_sec - t1->tv.tv_sec)*1000000;
+#elif defined(PIPE_OS_WINDOWS)
+ if(!frequency)
+ EngQueryPerformanceFrequency(&frequency);
+ return (t2->counter - t1->counter)*1000000LL/frequency;
+#endif
+}
+
+
+/**
+ * Compare two time values.
+ *
+ * Not publicly available because it does not take in account wrap-arounds.
+ * Use util_time_timeout instead.
+ */
+static INLINE int
+util_time_compare(const struct util_time *t1,
+ const struct util_time *t2)
+{
+#if defined(PIPE_OS_LINUX)
+ if (t1->tv.tv_sec < t2->tv.tv_sec)
+ return -1;
+ else if(t1->tv.tv_sec > t2->tv.tv_sec)
+ return 1;
+ else if (t1->tv.tv_usec < t2->tv.tv_usec)
+ return -1;
+ else if(t1->tv.tv_usec > t2->tv.tv_usec)
+ return 1;
+ else
+ return 0;
+#elif defined(PIPE_OS_WINDOWS)
+ if (t1->counter < t2->counter)
+ return -1;
+ else if(t1->counter > t2->counter)
+ return 1;
+ else
+ return 0;
+#endif
+}
+
+
+boolean
+util_time_timeout(const struct util_time *start,
+ const struct util_time *end,
+ const struct util_time *curr)
+{
+ if(util_time_compare(start, end) <= 0)
+ return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0);
+ else
+ return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0);
+}
+
+
+#if defined(PIPE_OS_WINDOWS)
+void util_time_sleep(unsigned usecs)
+{
+ LONGLONG start, curr, end;
+
+ EngQueryPerformanceCounter(&start);
+
+ if(!frequency)
+ EngQueryPerformanceFrequency(&frequency);
+
+ end = start + (usecs * frequency + 999999LL)/1000000LL;
+
+ do {
+ EngQueryPerformanceCounter(&curr);
+ } while(start <= curr && curr < end ||
+ end < start && (curr < end || start <= curr));
+}
+#endif
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
new file mode 100644
index 0000000000..48ec7a4a96
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * OS independent time-manipulation functions.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef U_TIME_H_
+#define U_TIME_H_
+
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_OS_LINUX)
+#include <time.h> /* timeval */
+#include <unistd.h> /* usleep */
+#endif
+
+#include "pipe/p_compiler.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Time abstraction.
+ *
+ * Do not access this structure directly. Use the provided function instead.
+ */
+struct util_time
+{
+#if defined(PIPE_OS_LINUX)
+ struct timeval tv;
+#else
+ long long counter;
+#endif
+};
+
+
+void
+util_time_get(struct util_time *t);
+
+void
+util_time_add(const struct util_time *t1,
+ int64_t usecs,
+ struct util_time *t2);
+
+int64_t
+util_time_diff(const struct util_time *t1,
+ const struct util_time *t2);
+
+/**
+ * Returns non-zero when the timeout expires.
+ */
+boolean
+util_time_timeout(const struct util_time *start,
+ const struct util_time *end,
+ const struct util_time *curr);
+
+#if defined(PIPE_OS_LINUX)
+#define util_time_sleep usleep
+#else
+void
+util_time_sleep(unsigned usecs);
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_TIME_H_ */