diff options
Diffstat (limited to 'src/gallium/auxiliary')
116 files changed, 9022 insertions, 8315 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 18acab0967..36dc46ff80 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -43,6 +43,9 @@ struct cso_cache { struct cso_hash *rasterizer_hash; struct cso_hash *sampler_hash; int max_size; + + cso_sanitize_callback sanitize_cb; + void *sanitize_data; }; #if 1 @@ -205,9 +208,20 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type) } } -static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + +static INLINE void sanitize_hash(struct cso_cache *sc, + struct cso_hash *hash, + enum cso_cache_type type, int max_size) { + if (sc->sanitize_cb) + sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); +} + + +static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ /* if we're approach the maximum size, remove fourth of the entries * otherwise every subsequent call will go through the same */ int hash_size = cso_hash_size(hash); @@ -231,7 +245,7 @@ cso_insert_state(struct cso_cache *sc, void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); - sanitize_hash(hash, type, sc->max_size); + sanitize_hash(sc, hash, type, sc->max_size); return cso_hash_insert(hash, hash_key, state); } @@ -255,9 +269,9 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash, while (!cso_hash_iter_is_null(iter)) { void *iter_data = cso_hash_iter_data(iter); if (!memcmp(iter_data, templ, size)) { - /* Return the payload: + /* We found a match */ - return (unsigned char *)iter_data + size; + return iter_data; } iter = cso_hash_iter_next(iter); } @@ -290,6 +304,8 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + if (sc == NULL) + return NULL; sc->max_size = 4096; sc->blend_hash = cso_hash_create(); @@ -298,6 +314,8 @@ struct cso_cache *cso_cache_create(void) sc->rasterizer_hash = cso_hash_create(); sc->fs_hash = cso_hash_create(); sc->vs_hash = cso_hash_create(); + sc->sanitize_cb = sanitize_cb; + sc->sanitize_data = 0; return sc; } @@ -332,10 +350,10 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, iter = cso_hash_first_node(hash); while (!cso_hash_iter_is_null(iter)) { void *state = cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); if (state) { func(state, user_data); } - iter = cso_hash_iter_next(iter); } } @@ -363,13 +381,13 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number) { sc->max_size = number; - sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size); - sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sanitize_hash(sc, sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc, sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, sc->max_size); - sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); - sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); - sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); - sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size); + sanitize_hash(sc, sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) @@ -377,3 +395,11 @@ int cso_maximum_cache_size(const struct cso_cache *sc) return sc->max_size; } +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data) +{ + sc->sanitize_cb = cb; + sc->sanitize_data = user_data; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index e5edbbb556..6b5c230e8f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -84,8 +84,22 @@ extern "C" { #endif +enum cso_cache_type { + CSO_BLEND, + CSO_SAMPLER, + CSO_DEPTH_STENCIL_ALPHA, + CSO_RASTERIZER, + CSO_FRAGMENT_SHADER, + CSO_VERTEX_SHADER +}; + typedef void (*cso_state_callback)(void *ctx, void *obj); +typedef void (*cso_sanitize_callback)(struct cso_hash *hash, + enum cso_cache_type type, + int max_size, + void *user_data); + struct cso_cache; struct cso_blend { @@ -130,21 +144,15 @@ struct cso_sampler { struct pipe_context *context; }; - -enum cso_cache_type { - CSO_BLEND, - CSO_SAMPLER, - CSO_DEPTH_STENCIL_ALPHA, - CSO_RASTERIZER, - CSO_FRAGMENT_SHADER, - CSO_VERTEX_SHADER -}; - unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); void cso_cache_delete(struct cso_cache *sc); +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data); + struct cso_hash_iter cso_insert_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *state); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 4a1a6cb79c..a1a3a9efaf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -25,16 +25,20 @@ * **************************************************************************/ - /* Wrap the cso cache & hash mechanisms in a simplified + /** + * @file + * + * Wrap the cso cache & hash mechanisms in a simplified * pipe-driver-specific interface. * - * Authors: - * Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> + * @author Zack Rusin <zack@tungstengraphics.com> + * @author Keith Whitwell <keith@tungstengraphics.com> */ #include "pipe/p_state.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" @@ -76,6 +80,131 @@ struct cso_context { }; +static boolean delete_blend_state(struct cso_context *ctx, void *state) +{ + struct cso_blend *cso = (struct cso_blend *)state; + + if (ctx->blend == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + + if (ctx->depth_stencil == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + + return TRUE; +} + +static boolean delete_sampler_state(struct cso_context *ctx, void *state) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_rasterizer_state(struct cso_context *ctx, void *state) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + + if (ctx->rasterizer == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_fs_state(struct cso_context *ctx, void *state) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (ctx->fragment_shader == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_vs_state(struct cso_context *ctx, void *state) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (ctx->vertex_shader == cso->data) + return TRUE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return FALSE; +} + + +static INLINE boolean delete_cso(struct cso_context *ctx, + void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + return delete_blend_state(ctx, state); + break; + case CSO_SAMPLER: + return delete_sampler_state(ctx, state); + break; + case CSO_DEPTH_STENCIL_ALPHA: + return delete_depth_stencil_state(ctx, state); + break; + case CSO_RASTERIZER: + return delete_rasterizer_state(ctx, state); + break; + case CSO_FRAGMENT_SHADER: + return delete_fs_state(ctx, state); + break; + case CSO_VERTEX_SHADER: + return delete_vs_state(ctx, state); + break; + default: + assert(0); + FREE(state); + } + return FALSE; +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + struct cso_context *ctx = (struct cso_context *)user_data; + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + struct cso_hash_iter iter = cso_hash_first_node(hash); + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + void *cso = cso_hash_iter_data(iter); + if (delete_cso(ctx, cso, type)) { + iter = cso_hash_erase(hash, iter); + --to_remove; + } else + iter = cso_hash_iter_next(iter); + } +} + + struct cso_context *cso_create_context( struct pipe_context *pipe ) { struct cso_context *ctx = CALLOC_STRUCT(cso_context); @@ -85,6 +214,9 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) ctx->cache = cso_cache_create(); if (ctx->cache == NULL) goto out; + cso_cache_set_sanitize_callback(ctx->cache, + sanitize_hash, + ctx); ctx->pipe = pipe; @@ -98,8 +230,14 @@ out: return NULL; } -static void cso_release_all( struct cso_context *ctx ) + +/** + * Prior to context destruction, this function unbinds all state objects. + */ +void cso_release_all( struct cso_context *ctx ) { + unsigned i; + if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -109,6 +247,11 @@ static void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_vs_state( ctx->pipe, NULL ); } + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->textures[i], NULL); + pipe_texture_reference(&ctx->textures_saved[i], NULL); + } + if (ctx->cache) { cso_cache_delete( ctx->cache ); ctx->cache = NULL; @@ -118,10 +261,10 @@ static void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { - if (ctx) - cso_release_all( ctx ); - - FREE( ctx ); + if (ctx) { + //cso_release_all( ctx ); + FREE( ctx ); + } } @@ -135,8 +278,8 @@ void cso_destroy_context( struct cso_context *ctx ) * the data member of the cso to be the template itself. */ -void cso_set_blend(struct cso_context *ctx, - const struct pipe_blend_state *templ) +enum pipe_error cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, @@ -146,6 +289,8 @@ void cso_set_blend(struct cso_context *ctx, if (cso_hash_iter_is_null(iter)) { struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); @@ -153,6 +298,11 @@ void cso_set_blend(struct cso_context *ctx, cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -163,6 +313,7 @@ void cso_set_blend(struct cso_context *ctx, ctx->blend = handle; ctx->pipe->bind_blend_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_blend(struct cso_context *ctx) @@ -182,12 +333,12 @@ void cso_restore_blend(struct cso_context *ctx) -void cso_single_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) +enum pipe_error cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) { void *handle = NULL; - + if (templ != NULL) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, @@ -196,13 +347,20 @@ void cso_single_sampler(struct cso_context *ctx, if (cso_hash_iter_is_null(iter)) { struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + cso->state = *templ; cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -211,11 +369,12 @@ void cso_single_sampler(struct cso_context *ctx, } ctx->samplers[idx] = handle; + return PIPE_OK; } void cso_single_sampler_done( struct cso_context *ctx ) { - unsigned i; + unsigned i; /* find highest non-null sampler */ for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { @@ -226,8 +385,8 @@ void cso_single_sampler_done( struct cso_context *ctx ) ctx->nr_samplers = i; if (ctx->hw.nr_samplers != ctx->nr_samplers || - memcmp(ctx->hw.samplers, - ctx->samplers, + memcmp(ctx->hw.samplers, + ctx->samplers, ctx->nr_samplers * sizeof(void *)) != 0) { memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); @@ -237,22 +396,36 @@ void cso_single_sampler_done( struct cso_context *ctx ) } } -void cso_set_samplers( struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates ) +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) { unsigned i; - + enum pipe_error temp, error = PIPE_OK; + /* TODO: fastpath */ - for (i = 0; i < nr; i++) - cso_single_sampler( ctx, i, templates[i] ); + for (i = 0; i < nr; i++) { + temp = cso_single_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } - for ( ; i < ctx->nr_samplers; i++) - cso_single_sampler( ctx, i, NULL ); - cso_single_sampler_done( ctx ); + + return error; } void cso_save_samplers(struct cso_context *ctx) @@ -263,44 +436,64 @@ void cso_save_samplers(struct cso_context *ctx) void cso_restore_samplers(struct cso_context *ctx) { - cso_set_samplers(ctx, ctx->nr_samplers_saved, - (const struct pipe_sampler_state **) ctx->samplers_saved); + ctx->nr_samplers = ctx->nr_samplers_saved; + memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers)); + cso_single_sampler_done( ctx ); } -void cso_set_sampler_textures( struct cso_context *ctx, - uint count, - struct pipe_texture **textures ) +enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, + uint count, + struct pipe_texture **textures ) { uint i; ctx->nr_textures = count; for (i = 0; i < count; i++) - ctx->textures[i] = textures[i]; + pipe_texture_reference(&ctx->textures[i], textures[i]); for ( ; i < PIPE_MAX_SAMPLERS; i++) - ctx->textures[i] = NULL; + pipe_texture_reference(&ctx->textures[i], NULL); ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); + + return PIPE_OK; } void cso_save_sampler_textures( struct cso_context *ctx ) { + uint i; + ctx->nr_textures_saved = ctx->nr_textures; - memcpy(ctx->textures_saved, ctx->textures, sizeof(ctx->textures)); + for (i = 0; i < ctx->nr_textures; i++) { + assert(!ctx->textures_saved[i]); + pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]); + } } void cso_restore_sampler_textures( struct cso_context *ctx ) { - cso_set_sampler_textures(ctx, ctx->nr_textures_saved, ctx->textures_saved); + uint i; + + ctx->nr_textures = ctx->nr_textures_saved; + + for (i = 0; i < ctx->nr_textures; i++) { + pipe_texture_reference(&ctx->textures[i], NULL); + ctx->textures[i] = ctx->textures_saved[i]; + ctx->textures_saved[i] = NULL; + } + for ( ; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures[i], NULL); + + ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); + ctx->nr_textures_saved = 0; } - -void cso_set_depth_stencil_alpha(struct cso_context *ctx, - const struct pipe_depth_stencil_alpha_state *templ) +enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_depth_stencil_alpha_state)); @@ -312,13 +505,20 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, if (cso_hash_iter_is_null(iter)) { struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; cso->context = ctx->pipe; - cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + iter = cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -329,6 +529,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, ctx->depth_stencil = handle; ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_depth_stencil_alpha(struct cso_context *ctx) @@ -348,8 +549,8 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx) -void cso_set_rasterizer(struct cso_context *ctx, - const struct pipe_rasterizer_state *templ) +enum pipe_error cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_rasterizer_state)); @@ -360,13 +561,20 @@ void cso_set_rasterizer(struct cso_context *ctx, if (cso_hash_iter_is_null(iter)) { struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; cso->context = ctx->pipe; - cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -377,6 +585,7 @@ void cso_set_rasterizer(struct cso_context *ctx, ctx->rasterizer = handle; ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_rasterizer(struct cso_context *ctx) @@ -395,36 +604,71 @@ void cso_restore_rasterizer(struct cso_context *ctx) } -void cso_set_fragment_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) + +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_shader_state)); + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->fragment_shader) { + /* unbind before deleting */ + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + ctx->fragment_shader = NULL; + } + ctx->pipe->delete_fs_state(ctx->pipe, handle); +} + +/* Not really working: + */ +#if 0 +enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + const struct tgsi_token *tokens = templ->tokens; + unsigned num_tokens = tgsi_num_tokens(tokens); + size_t tokens_size = num_tokens*sizeof(struct tgsi_token); + unsigned hash_key = cso_construct_key((void*)tokens, tokens_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_FRAGMENT_SHADER, - (void*)templ); + hash_key, + CSO_FRAGMENT_SHADER, + (void*)tokens); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size); + struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso)); - cso->state = *templ; + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(cso_tokens, tokens, tokens_size); + cso->state.tokens = cso_tokens; cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; } - if (ctx->fragment_shader != handle) { - ctx->fragment_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); - } + return cso_set_fragment_shader_handle( ctx, handle ); } +#endif void cso_save_fragment_shader(struct cso_context *ctx) { @@ -434,7 +678,6 @@ void cso_save_fragment_shader(struct cso_context *ctx) void cso_restore_fragment_shader(struct cso_context *ctx) { - assert(ctx->fragment_shader_saved); if (ctx->fragment_shader_saved != ctx->fragment_shader) { ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); ctx->fragment_shader = ctx->fragment_shader_saved; @@ -443,9 +686,32 @@ void cso_restore_fragment_shader(struct cso_context *ctx) } +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ) +{ + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->vertex_shader) { + /* unbind before deleting */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->vertex_shader = NULL; + } + ctx->pipe->delete_vs_state(ctx->pipe, handle); +} + -void cso_set_vertex_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) +/* Not really working: + */ +#if 0 +enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_shader_state)); @@ -457,23 +723,31 @@ void cso_set_vertex_shader(struct cso_context *ctx, if (cso_hash_iter_is_null(iter)) { struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + cso->state = *templ; cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; } - if (ctx->vertex_shader != handle) { - ctx->vertex_shader = handle; - ctx->pipe->bind_vs_state(ctx->pipe, handle); - } + return cso_set_vertex_shader_handle( ctx, handle ); } +#endif + + void cso_save_vertex_shader(struct cso_context *ctx) { @@ -483,9 +757,8 @@ void cso_save_vertex_shader(struct cso_context *ctx) void cso_restore_vertex_shader(struct cso_context *ctx) { - assert(ctx->vertex_shader_saved); if (ctx->vertex_shader_saved != ctx->vertex_shader) { - ctx->pipe->bind_fs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); ctx->vertex_shader = ctx->vertex_shader_saved; } ctx->vertex_shader_saved = NULL; @@ -493,14 +766,15 @@ void cso_restore_vertex_shader(struct cso_context *ctx) -void cso_set_framebuffer(struct cso_context *ctx, - const struct pipe_framebuffer_state *fb) +enum pipe_error cso_set_framebuffer(struct cso_context *ctx, + const struct pipe_framebuffer_state *fb) { /* XXX this memcmp() fails to detect buffer size changes */ if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) { ctx->fb = *fb; ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } + return PIPE_OK; } void cso_save_framebuffer(struct cso_context *ctx) @@ -517,13 +791,14 @@ void cso_restore_framebuffer(struct cso_context *ctx) } -void cso_set_viewport(struct cso_context *ctx, - const struct pipe_viewport_state *vp) +enum pipe_error cso_set_viewport(struct cso_context *ctx, + const struct pipe_viewport_state *vp) { if (memcmp(&ctx->vp, vp, sizeof(*vp))) { ctx->vp = *vp; ctx->pipe->set_viewport_state(ctx->pipe, vp); } + return PIPE_OK; } void cso_save_viewport(struct cso_context *ctx) @@ -543,11 +818,12 @@ void cso_restore_viewport(struct cso_context *ctx) -void cso_set_blend_color(struct cso_context *ctx, - const struct pipe_blend_color *bc) +enum pipe_error cso_set_blend_color(struct cso_context *ctx, + const struct pipe_blend_color *bc) { if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { ctx->blend_color = *bc; ctx->pipe->set_blend_color(ctx->pipe, bc); } + return PIPE_OK; } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 665e8d9911..b04e98bfa1 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -31,6 +31,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "pipe/p_error.h" #ifdef __cplusplus @@ -41,51 +42,53 @@ struct cso_context; struct cso_context *cso_create_context( struct pipe_context *pipe ); +void cso_release_all( struct cso_context *ctx ); + void cso_destroy_context( struct cso_context *cso ); -void cso_set_blend( struct cso_context *cso, - const struct pipe_blend_state *blend ); +enum pipe_error cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); void cso_save_blend(struct cso_context *cso); void cso_restore_blend(struct cso_context *cso); -void cso_set_depth_stencil_alpha( struct cso_context *cso, - const struct pipe_depth_stencil_alpha_state *dsa ); +enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); void cso_save_depth_stencil_alpha(struct cso_context *cso); void cso_restore_depth_stencil_alpha(struct cso_context *cso); -void cso_set_rasterizer( struct cso_context *cso, - const struct pipe_rasterizer_state *rasterizer ); +enum pipe_error cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); void cso_save_rasterizer(struct cso_context *cso); void cso_restore_rasterizer(struct cso_context *cso); -void cso_set_samplers( struct cso_context *cso, - unsigned count, - const struct pipe_sampler_state **states ); +enum pipe_error cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); void cso_save_samplers(struct cso_context *cso); void cso_restore_samplers(struct cso_context *cso); /* Alternate interface to support state trackers that like to modify * samplers one at a time: */ -void cso_single_sampler( struct cso_context *cso, - unsigned nr, - const struct pipe_sampler_state *states ); +enum pipe_error cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); void cso_single_sampler_done( struct cso_context *cso ); -void cso_set_sampler_textures( struct cso_context *cso, - uint count, - struct pipe_texture **textures ); +enum pipe_error cso_set_sampler_textures( struct cso_context *cso, + uint count, + struct pipe_texture **textures ); void cso_save_sampler_textures( struct cso_context *cso ); void cso_restore_sampler_textures( struct cso_context *cso ); @@ -96,34 +99,43 @@ void cso_restore_sampler_textures( struct cso_context *cso ); * (eg mesa's internall-generated texenv programs), it will be up to * the state tracker to implement their own specialized caching. */ -void cso_set_fragment_shader( struct cso_context *cso, - const struct pipe_shader_state *shader ); +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ); +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); +/* +enum pipe_error cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); +*/ void cso_save_fragment_shader(struct cso_context *cso); void cso_restore_fragment_shader(struct cso_context *cso); - -void cso_set_vertex_shader( struct cso_context *cso, - const struct pipe_shader_state *shader ); +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ); +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); +/* +enum pipe_error cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); +*/ void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); -void cso_set_framebuffer(struct cso_context *cso, - const struct pipe_framebuffer_state *fb); +enum pipe_error cso_set_framebuffer(struct cso_context *cso, + const struct pipe_framebuffer_state *fb); void cso_save_framebuffer(struct cso_context *cso); void cso_restore_framebuffer(struct cso_context *cso); -void cso_set_viewport(struct cso_context *cso, - const struct pipe_viewport_state *vp); +enum pipe_error cso_set_viewport(struct cso_context *cso, + const struct pipe_viewport_state *vp); void cso_save_viewport(struct cso_context *cso); void cso_restore_viewport(struct cso_context *cso); -void cso_set_blend_color(struct cso_context *cso, - const struct pipe_blend_color *bc); +enum pipe_error cso_set_blend_color(struct cso_context *cso, + const struct pipe_blend_color *bc); #ifdef __cplusplus diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index ddce3822f7..0646efd952 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -110,6 +110,10 @@ cso_hash_create_node(struct cso_hash *hash, struct cso_node **anextNode) { struct cso_node *node = cso_data_allocate_node(hash->data.d); + + if (!node) + return NULL; + node->key = akey; node->value = avalue; @@ -219,15 +223,30 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, { struct cso_node **nextNode = cso_hash_find_node(hash, key); struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); - struct cso_hash_iter iter = {hash, node}; - return iter; + if (!node) { + struct cso_hash_iter null_iter = {hash, 0}; + return null_iter; + } + + { + struct cso_hash_iter iter = {hash, node}; + return iter; + } } } struct cso_hash * cso_hash_create(void) { struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + if (!hash) + return NULL; + hash->data.d = MALLOC_STRUCT(cso_hash_data); + if (!hash->data.d) { + FREE(hash); + return NULL; + } + hash->data.d->fakeNext = 0; hash->data.d->buckets = 0; hash->data.d->size = 0; diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 73c4742006..85f3e276c6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -106,12 +106,12 @@ struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); /** - * Convenience routine to iterate over the collision list while doing a memory + * Convenience routine to iterate over the collision list while doing a memory * comparison to see which entry in the list is a direct copy of our template * and returns that entry. */ void *cso_hash_find_data_from_template( struct cso_hash *hash, - unsigned hash_key, + unsigned hash_key, void *templ, int size ); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 28262a92c6..da7eded21f 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,39 +4,37 @@ include $(TOP)/configs/current LIBNAME = draw C_SOURCES = \ - draw_aaline.c \ - draw_aapoint.c \ - draw_clip.c \ - draw_vs_exec.c \ - draw_vs_sse.c \ - draw_vs_llvm.c \ - draw_context.c\ - draw_cull.c \ - draw_debug.c \ - draw_flatshade.c \ - draw_offset.c \ + draw_context.c \ + draw_pipe.c \ + draw_pipe_aaline.c \ + draw_pipe_aapoint.c \ + draw_pipe_clip.c \ + draw_pipe_cull.c \ + draw_pipe_flatshade.c \ + draw_pipe_offset.c \ + draw_pipe_pstipple.c \ + draw_pipe_stipple.c \ + draw_pipe_twoside.c \ + draw_pipe_unfilled.c \ + draw_pipe_util.c \ + draw_pipe_validate.c \ + draw_pipe_vbuf.c \ + draw_pipe_wide_line.c \ + draw_pipe_wide_point.c \ draw_pt.c \ - draw_pt_vcache.c \ - draw_pt_fetch_emit.c \ - draw_pt_fetch_pipeline.c \ - draw_pt_pipeline.c \ draw_pt_elts.c \ - draw_prim.c \ - draw_pstipple.c \ - draw_stipple.c \ - draw_twoside.c \ - draw_unfilled.c \ - draw_validate.c \ - draw_vbuf.c \ + draw_pt_emit.c \ + draw_pt_fetch.c \ + draw_pt_fetch_emit.c \ + draw_pt_fetch_shade_pipeline.c \ + draw_pt_post_vs.c \ + draw_pt_varray.c \ + draw_pt_vcache.c \ draw_vertex.c \ - draw_vertex_cache.c \ - draw_vertex_fetch.c \ - draw_vertex_shader.c \ - draw_vf.c \ - draw_vf_generic.c \ - draw_vf_sse.c \ - draw_wide_line.c \ - draw_wide_point.c + draw_vs.c \ + draw_vs_exec.c \ + draw_vs_llvm.c \ + draw_vs_sse.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 52107912f5..3b5d5ed492 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,39 +3,37 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ - 'draw_aaline.c', - 'draw_aapoint.c', - 'draw_clip.c', - 'draw_vs_exec.c', - 'draw_vs_sse.c', - 'draw_vs_llvm.c', 'draw_context.c', - 'draw_cull.c', - 'draw_debug.c', - 'draw_flatshade.c', - 'draw_offset.c', + 'draw_pipe.c', + 'draw_pipe_aaline.c', + 'draw_pipe_aapoint.c', + 'draw_pipe_clip.c', + 'draw_pipe_cull.c', + 'draw_pipe_flatshade.c', + 'draw_pipe_offset.c', + 'draw_pipe_pstipple.c', + 'draw_pipe_stipple.c', + 'draw_pipe_twoside.c', + 'draw_pipe_unfilled.c', + 'draw_pipe_util.c', + 'draw_pipe_validate.c', + 'draw_pipe_vbuf.c', + 'draw_pipe_wide_line.c', + 'draw_pipe_wide_point.c', 'draw_pt.c', - 'draw_pt_vcache.c', - 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_pipeline.c', - 'draw_pt_pipeline.c', 'draw_pt_elts.c', - 'draw_prim.c', - 'draw_pstipple.c', - 'draw_stipple.c', - 'draw_twoside.c', - 'draw_unfilled.c', - 'draw_validate.c', - 'draw_vbuf.c', + 'draw_pt_emit.c', + 'draw_pt_fetch.c', + 'draw_pt_fetch_emit.c', + 'draw_pt_fetch_shade_pipeline.c', + 'draw_pt_post_vs.c', + 'draw_pt_varray.c', + 'draw_pt_vcache.c', 'draw_vertex.c', - 'draw_vertex_cache.c', - 'draw_vertex_fetch.c', - 'draw_vertex_shader.c', - 'draw_vf.c', - 'draw_vf_generic.c', - 'draw_vf_sse.c', - 'draw_wide_point.c', - 'draw_wide_line.c' + 'draw_vs.c', + 'draw_vs_exec.c', + 'draw_vs_llvm.c', + 'draw_vs_sse.c', ]) auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b3c65c90d6..98e23fa830 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -33,8 +33,10 @@ #include "pipe/p_util.h" #include "draw_context.h" -#include "draw_private.h" #include "draw_vbuf.h" +#include "draw_vs.h" +#include "draw_pt.h" +#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -43,38 +45,6 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; -#if defined(__i386__) || defined(__386__) - draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - draw->use_sse = FALSE; -#endif - - /* create pipeline stages */ - draw->pipeline.wide_line = draw_wide_line_stage( draw ); - draw->pipeline.wide_point = draw_wide_point_stage( draw ); - draw->pipeline.stipple = draw_stipple_stage( draw ); - draw->pipeline.unfilled = draw_unfilled_stage( draw ); - draw->pipeline.twoside = draw_twoside_stage( draw ); - draw->pipeline.offset = draw_offset_stage( draw ); - draw->pipeline.clip = draw_clip_stage( draw ); - draw->pipeline.flatshade = draw_flatshade_stage( draw ); - draw->pipeline.cull = draw_cull_stage( draw ); - draw->pipeline.validate = draw_validate_stage( draw ); - draw->pipeline.first = draw->pipeline.validate; - - if (!draw->pipeline.wide_line || - !draw->pipeline.wide_point || - !draw->pipeline.stipple || - !draw->pipeline.unfilled || - !draw->pipeline.twoside || - !draw->pipeline.offset || - !draw->pipeline.clip || - !draw->pipeline.flatshade || - !draw->pipeline.cull || - !draw->pipeline.validate) - goto fail; - - ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -83,31 +53,18 @@ struct draw_context *draw_create( void ) ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - /* Statically allocate maximum sized vertices for the cache - could be cleverer... - */ - { - uint i; - const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); - if (!tmp) - goto fail; - - for (i = 0; i < Elements(draw->vs.queue); i++) - draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); - } - draw->shader_queue_flush = draw_vertex_shader_queue_flush; + draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - /* these defaults are oriented toward the needs of softpipe */ - draw->wide_point_threshold = 1000000.0; /* infinity */ - draw->wide_line_threshold = 1.0; - draw->line_stipple = TRUE; - draw->point_sprite = TRUE; + tgsi_exec_machine_init(&draw->machine); - draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ + /* FIXME: give this machine thing a proper constructor: + */ + draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw_vertex_cache_invalidate( draw ); - draw_set_mapped_element_buffer( draw, 0, NULL ); + if (!draw_pipeline_init( draw )) + goto fail; if (!draw_pt_init( draw )) goto fail; @@ -125,39 +82,14 @@ void draw_destroy( struct draw_context *draw ) if (!draw) return; - if (draw->pipeline.wide_line) - draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); - if (draw->pipeline.wide_point) - draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); - if (draw->pipeline.stipple) - draw->pipeline.stipple->destroy( draw->pipeline.stipple ); - if (draw->pipeline.unfilled) - draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); - if (draw->pipeline.twoside) - draw->pipeline.twoside->destroy( draw->pipeline.twoside ); - if (draw->pipeline.offset) - draw->pipeline.offset->destroy( draw->pipeline.offset ); - if (draw->pipeline.clip) - draw->pipeline.clip->destroy( draw->pipeline.clip ); - if (draw->pipeline.flatshade) - draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); - if (draw->pipeline.cull) - draw->pipeline.cull->destroy( draw->pipeline.cull ); - if (draw->pipeline.validate) - draw->pipeline.validate->destroy( draw->pipeline.validate ); - if (draw->pipeline.aaline) - draw->pipeline.aaline->destroy( draw->pipeline.aaline ); - if (draw->pipeline.aapoint) - draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); - if (draw->pipeline.pstipple) - draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); - if (draw->pipeline.rasterize) - draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); + + if (draw->machine.Inputs) + align_free(draw->machine.Inputs); + + if (draw->machine.Outputs) + align_free(draw->machine.Outputs); tgsi_exec_machine_free_data(&draw->machine); - - if (draw->vs.queue[0].vertex) - align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ /* Not so fast -- we're just borrowing this at the moment. * @@ -165,6 +97,7 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); FREE( draw ); @@ -189,6 +122,20 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->rasterizer = raster; + draw->bypass_clipping = + ((draw->rasterizer && draw->rasterizer->bypass_clipping) || + draw->driver.bypass_clipping); +} + + +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->driver.bypass_clipping = bypass_clipping; + draw->bypass_clipping = (draw->rasterizer->bypass_clipping || + draw->driver.bypass_clipping); } @@ -247,9 +194,8 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - - memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); + memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); + draw->pt.nr_vertex_buffers = count; } @@ -260,9 +206,8 @@ draw_set_vertex_elements(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - - memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); + memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0])); + draw->pt.nr_vertex_elements = count; } @@ -273,8 +218,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - draw->user.vbuffer[attr] = buffer; + draw->pt.user.vbuffer[attr] = buffer; } @@ -282,8 +226,7 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - draw->user.constants = buffer; + draw->pt.user.constants = buffer; } @@ -295,7 +238,7 @@ void draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_point_threshold = threshold; + draw->pipeline.wide_point_threshold = threshold; } @@ -307,7 +250,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = threshold; } @@ -318,7 +261,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->line_stipple = enable; + draw->pipeline.line_stipple = enable; } @@ -329,7 +272,7 @@ void draw_enable_point_sprites(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->point_sprite = enable; + draw->pipeline.point_sprite = enable; } @@ -384,79 +327,56 @@ draw_num_vs_outputs(struct draw_context *draw) } -/** - * Allocate space for temporary post-transform vertices, such as for clipping. - */ -void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) -{ - assert(!stage->tmp); - - stage->nr_tmps = nr; - - if (nr) { - ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - unsigned i; - - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); - } -} - -void draw_free_temp_verts( struct draw_stage *stage ) +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) { - if (stage->tmp) { - FREE( stage->tmp[0] ); - FREE( stage->tmp ); - stage->tmp = NULL; - } + draw->render = render; } - -boolean draw_use_sse(struct draw_context *draw) +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ) { - return (boolean) draw->use_sse; + draw->pt.user.edgeflag = edgeflag; } -void draw_reset_vertex_ids(struct draw_context *draw) -{ - struct draw_stage *stage = draw->pipeline.first; - - while (stage) { - unsigned i; - - for (i = 0; i < stage->nr_tmps; i++) - stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; - stage = stage->next; - } - draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */ - draw_pt_reset_vertex_ids(draw); +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ) +{ + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; } -void draw_set_render( struct draw_context *draw, - struct vbuf_render *render ) + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) { - draw->render = render; -} + if (!draw->suspend_flushing) + { + assert(!draw->flushing); /* catch inadvertant recursion */ -void draw_set_edgeflags( struct draw_context *draw, - const unsigned *edgeflag ) -{ - draw->user.edgeflag = edgeflag; -} + draw->flushing = TRUE; + draw_pipeline_flush( draw, flags ); -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ) -{ - if (draw->user.edgeflag) - return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0; - else - return 1; + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ + + draw->flushing = FALSE; + } } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c7ac32b452..c5c3d3b09e 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -42,37 +42,11 @@ struct pipe_context; -struct vertex_buffer; -struct vertex_info; struct draw_context; struct draw_stage; struct draw_vertex_shader; -/** - * Clipmask flags - */ -/*@{*/ -#define CLIP_RIGHT_BIT 0x01 -#define CLIP_LEFT_BIT 0x02 -#define CLIP_TOP_BIT 0x04 -#define CLIP_BOTTOM_BIT 0x08 -#define CLIP_NEAR_BIT 0x10 -#define CLIP_FAR_BIT 0x20 -/*@}*/ - -/** - * Bitshift for each clip flag - */ -/*@{*/ -#define CLIP_RIGHT_SHIFT 0 -#define CLIP_LEFT_SHIFT 1 -#define CLIP_TOP_SHIFT 2 -#define CLIP_BOTTOM_SHIFT 3 -#define CLIP_NEAR_SHIFT 4 -#define CLIP_FAR_SHIFT 5 -/*@}*/ - struct draw_context *draw_create( void ); @@ -99,15 +73,13 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable); void draw_enable_point_sprites(struct draw_context *draw, boolean enable); -boolean draw_use_sse(struct draw_context *draw); - -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); @@ -168,17 +140,24 @@ void draw_arrays(struct draw_context *draw, unsigned prim, void draw_flush(struct draw_context *draw); -/*********************************************************************** - * draw_debug.c + +/******************************************************************************* + * Driver backend interface */ -boolean draw_validate_prim( unsigned prim, unsigned length ); -unsigned draw_trim_prim( unsigned mode, unsigned count ); +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ); +/******************************************************************************* + * Draw pipeline + */ +boolean draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, + unsigned prim ); -struct vbuf_render; -void draw_set_render( struct draw_context *draw, - struct vbuf_render *render ); #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c new file mode 100644 index 0000000000..46afb0f41f --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +boolean draw_pipeline_init( struct draw_context *draw ) +{ + /* create pipeline stages */ + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + return FALSE; + + /* these defaults are oriented toward the needs of softpipe */ + draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.line_stipple = TRUE; + draw->pipeline.point_sprite = TRUE; + + return TRUE; +} + + +void draw_pipeline_destroy( struct draw_context *draw ) +{ + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); +} + + + + + + + +static void do_point( struct draw_context *draw, + const char *v0 ) +{ + struct prim_header prim; + + prim.flags = 0; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + + draw->pipeline.first->point( draw->pipeline.first, &prim ); +} + + +static void do_line( struct draw_context *draw, + ushort flags, + const char *v0, + const char *v1 ) +{ + struct prim_header prim; + + prim.flags = flags; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + + draw->pipeline.first->line( draw->pipeline.first, &prim ); +} + + +static void do_triangle( struct draw_context *draw, + ushort flags, + char *v0, + char *v1, + char *v2 ) +{ + struct prim_header prim; + + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + prim.v[2] = (struct vertex_header *)v2; + prim.flags = flags; + prim.pad = 0; + + draw->pipeline.first->tri( draw->pipeline.first, &prim ); +} + + + + +/* Code to run the pipeline on a fairly arbitary collection of vertices. + * + * Vertex headers must be pre-initialized with the + * UNDEFINED_VERTEX_ID, this code will cause that id to become + * overwritten, so it may have to be reset if there is the intention + * to reuse the vertices. + * + * This code provides a callback to reset the vertex id's which the + * draw_vbuf.c code uses when it has to perform a flush. + */ +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + char *verts = (char *)vertices; + unsigned i; + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = vertex_count; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * elts[i] ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + elts[i+0], /* flags */ + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), + verts + stride * elts[i+1]); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + elts[i+0], /* flags */ + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), + verts + stride * elts[i+1], + verts + stride * elts[i+2]); + break; + } + + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; +} + + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ) +{ + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h new file mode 100644 index 0000000000..f1cb0891ca --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -0,0 +1,125 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_PIPE_H +#define DRAW_PIPE_H + +#include "pipe/p_compiler.h" +#include "draw_private.h" /* for sizeof(vertex_header) */ + + +/** + * Basic info for a point/line/triangle primitive. + */ +struct prim_header { + float det; /**< front/back face determinant */ + ushort flags; + ushort pad; + struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ +}; + + + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); +extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + +extern void draw_reset_vertex_ids( struct draw_context *draw ); + +void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e8d2a45102..346a96e37f 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -43,6 +43,7 @@ #include "draw_context.h" #include "draw_private.h" +#include "draw_pipe.h" /** @@ -333,11 +334,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for drawing AA lines. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aaline_fs(struct aaline_stage *aaline) { const struct pipe_shader_state *orig_fs = &aaline->fs->state; - //struct draw_context *draw = aaline->stage.draw; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; @@ -345,6 +345,8 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline_fs = *orig_fs; /* copy to init */ aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aaline_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -369,15 +371,18 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + if (aaline->fs->aaline_fs == NULL) + return FALSE; aaline->fs->generic_attrib = transform.maxGeneric + 1; + return TRUE; } /** * Create the texture map we'll use for antialiasing the lines. */ -static void +static boolean aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; @@ -387,7 +392,7 @@ aaline_create_texture(struct aaline_stage *aaline) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; @@ -395,6 +400,8 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.cpp = 1; aaline->texture = screen->texture_create(screen, &texTemp); + if (!aaline->texture) + return FALSE; /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost @@ -410,6 +417,8 @@ aaline_create_texture(struct aaline_stage *aaline) surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); data = pipe_surface_map(surface); + if (data == NULL) + return FALSE; for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { @@ -435,6 +444,7 @@ aaline_create_texture(struct aaline_stage *aaline) pipe_surface_reference(&surface, NULL); pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); } + return TRUE; } @@ -443,7 +453,7 @@ aaline_create_texture(struct aaline_stage *aaline) * By using a mipmapped texture, we don't have to generate a different * texture image for each line size. */ -static void +static boolean aaline_create_sampler(struct aaline_stage *aaline) { struct pipe_sampler_state sampler; @@ -461,6 +471,10 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.max_lod = MAX_TEXTURE_LEVEL; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (aaline->sampler_cso == NULL) + return FALSE; + + return TRUE; } @@ -468,13 +482,20 @@ aaline_create_sampler(struct aaline_stage *aaline) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aaline_fragment_shader(struct aaline_stage *aaline) { - if (!aaline->fs->aaline_fs) { - generate_aaline_fs(aaline); - } + struct draw_context *draw = aaline->stage.draw; + + if (!aaline->fs->aaline_fs && + !generate_aaline_fs(aaline)) + return FALSE; + + draw->suspend_flushing = TRUE; aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); + draw->suspend_flushing = FALSE; + + return TRUE; } @@ -486,20 +507,6 @@ aaline_stage( struct draw_stage *stage ) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - /** * Draw a wide line by drawing a quad, using geometry which will * fullfill GL's antialiased line requirements. @@ -637,7 +644,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* * Bind (generate) our fragprog, sampler and texture */ - bind_aaline_fragment_shader(aaline); + if (!bind_aaline_fragment_shader(aaline)) { + stage->line = draw_pipe_passthrough_line; + stage->line(stage, header); + return; + } /* update vertex attrib info */ aaline->tex_slot = draw->num_vs_outputs; @@ -657,8 +668,10 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], aaline->texture); + draw->suspend_flushing = TRUE; aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + draw->suspend_flushing = FALSE; /* now really draw first line */ stage->line = aaline_line; @@ -676,14 +689,14 @@ aaline_flush(struct draw_stage *stage, unsigned flags) stage->line = aaline_first_line; stage->next->flush( stage->next, flags ); - /* restore original frag shader */ + /* restore original frag shader, texture, sampler state */ + draw->suspend_flushing = TRUE; aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); - - /* XXX restore original texture, sampler state */ aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, aaline->state.sampler); aaline->driver_set_sampler_textures(pipe, aaline->num_textures, aaline->state.texture); + draw->suspend_flushing = FALSE; draw->extra_vp_outputs.slot = 0; } @@ -700,10 +713,17 @@ static void aaline_destroy(struct draw_stage *stage) { struct aaline_stage *aaline = aaline_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } - aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + if (aaline->sampler_cso) + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); - pipe_texture_release(&aaline->texture); + if (aaline->texture) + pipe_texture_release(&aaline->texture); draw_free_temp_verts( stage ); @@ -715,19 +735,28 @@ static struct aaline_stage * draw_aaline_stage(struct draw_context *draw) { struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + if (aaline == NULL) + return NULL; - draw_alloc_temp_verts( &aaline->stage, 8 ); + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; aaline->stage.draw = draw; aaline->stage.next = NULL; - aaline->stage.point = passthrough_point; + aaline->stage.point = draw_pipe_passthrough_point; aaline->stage.line = aaline_first_line; - aaline->stage.tri = passthrough_tri; + aaline->stage.tri = draw_pipe_passthrough_tri; aaline->stage.flush = aaline_flush; aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; return aaline; + + fail: + if (aaline) + aaline_destroy(&aaline->stage); + + return NULL; } @@ -749,13 +778,13 @@ aaline_create_fs_state(struct pipe_context *pipe, { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); return aafs; } @@ -766,6 +795,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ aaline->fs = aafs; /* pass-through */ @@ -790,9 +820,11 @@ aaline_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); aaline->num_samplers = num; + /* pass-through */ aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } @@ -809,6 +841,9 @@ aaline_set_sampler_textures(struct pipe_context *pipe, for (i = 0; i < num; i++) { pipe_texture_reference(&aaline->state.texture[i], texture[i]); } + for ( ; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } aaline->num_textures = num; /* pass-through */ @@ -821,7 +856,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe, * into the draw module's pipeline. This will not be used if the * hardware has native support for AA lines. */ -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) { struct aaline_stage *aaline; @@ -832,14 +867,17 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) * Create / install AA line drawing / prim stage */ aaline = draw_aaline_stage( draw ); - assert(aaline); - draw->pipeline.aaline = &aaline->stage; + if (!aaline) + goto fail; aaline->pipe = pipe; /* create special texture, sampler state */ - aaline_create_texture(aaline); - aaline_create_sampler(aaline); + if (!aaline_create_texture(aaline)) + goto fail; + + if (!aaline_create_sampler(aaline)) + goto fail; /* save original driver functions */ aaline->driver_create_fs_state = pipe->create_fs_state; @@ -856,4 +894,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->bind_sampler_states = aaline_bind_sampler_states; pipe->set_sampler_textures = aaline_set_sampler_textures; + + /* Install once everything is known to be OK: + */ + draw->pipeline.aaline = &aaline->stage; + + return TRUE; + + fail: + if (aaline) + aaline->stage.destroy( &aaline->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index fcebe3e7a0..122a48660a 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -48,7 +48,8 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" /* @@ -480,10 +481,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /** - * Generate the frag shader we'll use for drawing AA lines. + * Generate the frag shader we'll use for drawing AA points. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aapoint_fs(struct aapoint_stage *aapoint) { const struct pipe_shader_state *orig_fs = &aapoint->fs->state; @@ -494,6 +495,8 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint_fs = *orig_fs; /* copy to init */ aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aapoint_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -518,22 +521,33 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + if (aapoint->fs->aapoint_fs == NULL) + return FALSE; aapoint->fs->generic_attrib = transform.maxGeneric + 1; + + return TRUE; } /** - * When we're about to draw our first AA line in a batch, this function is + * When we're about to draw our first AA point in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) { - if (!aapoint->fs->aapoint_fs) { - generate_aapoint_fs(aapoint); - } + struct draw_context *draw = aapoint->stage.draw; + + if (!aapoint->fs->aapoint_fs && + !generate_aapoint_fs(aapoint)) + return FALSE; + + draw->suspend_flushing = TRUE; aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); + draw->suspend_flushing = FALSE; + + return TRUE; } @@ -545,18 +559,6 @@ aapoint_stage( struct draw_stage *stage ) } -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} /** @@ -700,7 +702,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) } } - /* now really draw first line */ + /* now really draw first point */ stage->point = aapoint_point; stage->point(stage, header); } @@ -717,7 +719,9 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) stage->next->flush( stage->next, flags ); /* restore original frag shader */ + draw->suspend_flushing = TRUE; aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + draw->suspend_flushing = FALSE; draw->extra_vp_outputs.slot = 0; } @@ -742,19 +746,29 @@ static struct aapoint_stage * draw_aapoint_stage(struct draw_context *draw) { struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + if (aapoint == NULL) + goto fail; - draw_alloc_temp_verts( &aapoint->stage, 4 ); + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; aapoint->stage.draw = draw; aapoint->stage.next = NULL; aapoint->stage.point = aapoint_first_point; - aapoint->stage.line = passthrough_line; - aapoint->stage.tri = passthrough_tri; + aapoint->stage.line = draw_pipe_passthrough_line; + aapoint->stage.tri = draw_pipe_passthrough_tri; aapoint->stage.flush = aapoint_flush; aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; return aapoint; + + fail: + if (aapoint) + aapoint_destroy(&aapoint->stage); + + return NULL; + } @@ -776,13 +790,13 @@ aapoint_create_fs_state(struct pipe_context *pipe, { struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); return aafs; } @@ -817,7 +831,7 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) * into the draw module's pipeline. This will not be used if the * hardware has native support for AA points. */ -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe) { @@ -829,8 +843,8 @@ draw_install_aapoint_stage(struct draw_context *draw, * Create / install AA point drawing / prim stage */ aapoint = draw_aapoint_stage( draw ); - assert(aapoint); - draw->pipeline.aapoint = &aapoint->stage; + if (aapoint == NULL) + goto fail; aapoint->pipe = pipe; @@ -843,4 +857,14 @@ draw_install_aapoint_stage(struct draw_context *draw, pipe->create_fs_state = aapoint_create_fs_state; pipe->bind_fs_state = aapoint_bind_fs_state; pipe->delete_fs_state = aapoint_delete_fs_state; + + draw->pipeline.aapoint = &aapoint->stage; + + return TRUE; + + fail: + if (aapoint) + aapoint->stage.destroy( &aapoint->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index e24c5d8032..ce80c94163 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -35,8 +35,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" #ifndef IS_NEGATIVE @@ -119,7 +119,7 @@ static void interp( const struct clipper *clip, */ { dst->clipmask = 0; - dst->edgeflag = 0; + dst->edgeflag = 0; /* will get overwritten later */ dst->pad = 0; dst->vertex_id = UNDEFINED_VERTEX_ID; } @@ -162,49 +162,50 @@ static void emit_poly( struct draw_stage *stage, struct prim_header header; unsigned i; + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + /* later stages may need the determinant, but only the sign matters */ header.det = origPrim->det; + header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + header.pad = 0; - for (i = 2; i < n; i++) { + for (i = 2; i < n; i++, header.flags = 0) { header.v[0] = inlist[i-1]; header.v[1] = inlist[i]; header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ - - { - unsigned tmp1 = header.v[1]->edgeflag; - unsigned tmp2 = header.v[2]->edgeflag; - - if (i != n-1) header.v[1]->edgeflag = 0; - if (i != 2) header.v[2]->edgeflag = 0; - - header.edgeflags = ((header.v[0]->edgeflag << 0) | - (header.v[1]->edgeflag << 1) | - (header.v[2]->edgeflag << 2)); - - if (0) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint j, k; - debug_printf("Clipped tri:\n"); - for (j = 0; j < 3; j++) { - for (k = 0; k < vs->info.num_outputs; k++) { - debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, - header.v[j]->data[k][0], - header.v[j]->data[k][1], - header.v[j]->data[k][2], - header.v[j]->data[k][3]); - } + + if (i == n-1) + header.flags |= edge_last; + + if (0) { + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint j, k; + debug_printf("Clipped tri:\n"); + for (j = 0; j < 3; j++) { + for (k = 0; k < vs->info.num_outputs; k++) { + debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + header.v[j]->data[k][0], + header.v[j]->data[k][1], + header.v[j]->data[k][2], + header.v[j]->data[k][3]); } } - - stage->next->tri( stage->next, &header ); - - header.v[1]->edgeflag = tmp1; - header.v[2]->edgeflag = tmp2; } + + stage->next->tri( stage->next, &header ); } } - +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} /* Clip a triangle against the viewport and user clip planes. @@ -486,8 +487,11 @@ static void clip_destroy( struct draw_stage *stage ) struct draw_stage *draw_clip_stage( struct draw_context *draw ) { struct clipper *clipper = CALLOC_STRUCT(clipper); + if (clipper == NULL) + goto fail; - draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; clipper->stage.draw = draw; clipper->stage.point = clip_point; @@ -500,4 +504,10 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; return &clipper->stage; + + fail: + if (clipper) + clipper->stage.destroy( &clipper->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 8177b0ac86..87aaf1f85b 100644 --- a/src/gallium/auxiliary/draw/draw_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -35,7 +35,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" -#include "draw_private.h" +#include "draw_pipe.h" struct cull_stage { @@ -95,20 +95,6 @@ static void cull_first_tri( struct draw_stage *stage, -static void cull_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void cull_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - static void cull_flush( struct draw_stage *stage, unsigned flags ) { stage->tri = cull_first_tri; @@ -134,17 +120,26 @@ static void cull_destroy( struct draw_stage *stage ) struct draw_stage *draw_cull_stage( struct draw_context *draw ) { struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + if (cull == NULL) + goto fail; - draw_alloc_temp_verts( &cull->stage, 0 ); + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; cull->stage.draw = draw; cull->stage.next = NULL; - cull->stage.point = cull_point; - cull->stage.line = cull_line; + cull->stage.point = draw_pipe_passthrough_point; + cull->stage.line = draw_pipe_passthrough_line; cull->stage.tri = cull_first_tri; cull->stage.flush = cull_flush; cull->stage.reset_stipple_counter = cull_reset_stipple_counter; cull->stage.destroy = cull_destroy; return &cull->stage; + + fail: + if (cull) + cull->stage.destroy( &cull->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index af2cb05c98..09b68c4559 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -30,7 +30,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" /** subclass of draw_stage */ @@ -90,7 +91,8 @@ static void flatshade_tri_0( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = header->v[0]; tmp.v[1] = dup_vert(stage, header->v[1], 0); tmp.v[2] = dup_vert(stage, header->v[2], 1); @@ -107,7 +109,8 @@ static void flatshade_tri_2( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = dup_vert(stage, header->v[0], 0); tmp.v[1] = dup_vert(stage, header->v[1], 1); tmp.v[2] = header->v[2]; @@ -151,13 +154,6 @@ static void flatshade_line_1( struct draw_stage *stage, } -/* Flatshade point -- passthrough. - */ -static void flatshade_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void flatshade_init_state( struct draw_stage *stage ) @@ -230,12 +226,15 @@ static void flatshade_destroy( struct draw_stage *stage ) struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) { struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + if (flatshade == NULL) + goto fail; - draw_alloc_temp_verts( &flatshade->stage, 2 ); + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; flatshade->stage.draw = draw; flatshade->stage.next = NULL; - flatshade->stage.point = flatshade_point; + flatshade->stage.point = draw_pipe_passthrough_point; flatshade->stage.line = flatshade_first_line; flatshade->stage.tri = flatshade_first_tri; flatshade->stage.flush = flatshade_flush; @@ -243,6 +242,12 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.destroy = flatshade_destroy; return &flatshade->stage; + + fail: + if (flatshade) + flatshade->stage.destroy( &flatshade->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index dbc676deae..ea6de8c571 100644 --- a/src/gallium/auxiliary/draw/draw_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -33,7 +33,7 @@ */ #include "pipe/p_util.h" -#include "draw_private.h" +#include "draw_pipe.h" @@ -106,7 +106,8 @@ static void offset_tri( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = dup_vert(stage, header->v[0], 0); tmp.v[1] = dup_vert(stage, header->v[1], 1); tmp.v[2] = dup_vert(stage, header->v[2], 2); @@ -129,18 +130,6 @@ static void offset_first_tri( struct draw_stage *stage, } -static void offset_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void offset_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void offset_flush( struct draw_stage *stage, @@ -170,17 +159,25 @@ static void offset_destroy( struct draw_stage *stage ) struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + if (offset == NULL) + goto fail; draw_alloc_temp_verts( &offset->stage, 3 ); offset->stage.draw = draw; offset->stage.next = NULL; - offset->stage.point = offset_point; - offset->stage.line = offset_line; + offset->stage.point = draw_pipe_passthrough_point; + offset->stage.line = draw_pipe_passthrough_line; offset->stage.tri = offset_first_tri; offset->stage.flush = offset_flush; offset->stage.reset_stipple_counter = offset_reset_stipple_counter; offset->stage.destroy = offset_destroy; return &offset->stage; + + fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4dddb72906..2dfd1800d5 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -44,7 +44,7 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_pipe.h" @@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint size = 4; immed = tgsi_default_full_immediate(); immed.Immediate.Size = 1 + size; /* one for the token itself */ - immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + immed.u.Pointer = (void *) value; ctx->emit_immediate(ctx, &immed); } @@ -320,7 +320,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for doing polygon stipple. * This will be the user's shader prefixed with a TEX and KIL instruction. */ -static void +static boolean generate_pstip_fs(struct pstip_stage *pstip) { const struct pipe_shader_state *orig_fs = &pstip->fs->state; @@ -332,6 +332,8 @@ generate_pstip_fs(struct pstip_stage *pstip) pstip_fs = *orig_fs; /* copy to init */ pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (pstip_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; @@ -355,6 +357,8 @@ generate_pstip_fs(struct pstip_stage *pstip) assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); + + return TRUE; } @@ -404,7 +408,7 @@ pstip_update_texture(struct pstip_stage *pstip) /** * Create the texture map we'll use for stippling. */ -static void +static boolean pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; @@ -413,7 +417,7 @@ pstip_create_texture(struct pstip_stage *pstip) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; texTemp.width[0] = 32; texTemp.height[0] = 32; @@ -421,16 +425,17 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.cpp = 1; pstip->texture = screen->texture_create(screen, &texTemp); - assert(pstip->texture->refcount == 1); + if (pstip->texture == NULL) + return FALSE; + + return TRUE; } /** - * Create the sampler CSO that'll be used for antialiasing. - * By using a mipmapped texture, we don't have to generate a different - * texture image for each line size. + * Create the sampler CSO that'll be used for stippling. */ -static void +static boolean pstip_create_sampler(struct pstip_stage *pstip) { struct pipe_sampler_state sampler; @@ -448,24 +453,32 @@ pstip_create_sampler(struct pstip_stage *pstip) sampler.max_lod = 0.0f; pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (pstip->sampler_cso == NULL) + return FALSE; + + return TRUE; } /** - * When we're about to draw our first AA line in a batch, this function is - * called to tell the driver to bind our modified fragment shader. + * When we're about to draw our first stipple polygon in a batch, this function + * is called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) { - if (!pstip->fs->pstip_fs) { - generate_pstip_fs(pstip); - } + struct draw_context *draw = pstip->stage.draw; + if (!pstip->fs->pstip_fs && + !generate_pstip_fs(pstip)) + return FALSE; + + draw->suspend_flushing = TRUE; pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); + draw->suspend_flushing = FALSE; + return TRUE; } - static INLINE struct pstip_stage * pstip_stage( struct draw_stage *stage ) { @@ -474,38 +487,22 @@ pstip_stage( struct draw_stage *stage ) static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - - -static void pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); struct pipe_context *pipe = pstip->pipe; + struct draw_context *draw = stage->draw; uint num_samplers; assert(stage->draw->rasterizer->poly_stipple_enable); /* bind our fragprog */ - bind_pstip_fragment_shader(pstip); + if (!bind_pstip_fragment_shader(pstip)) { + stage->tri = draw_pipe_passthrough_tri; + stage->tri(stage, header); + return; + } + /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -519,11 +516,13 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) assert(num_samplers <= PIPE_MAX_SAMPLERS); + draw->suspend_flushing = TRUE; pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + draw->suspend_flushing = FALSE; - /* now really draw first line */ - stage->tri = passthrough_tri; + /* now really draw first triangle */ + stage->tri = draw_pipe_passthrough_tri; stage->tri(stage, header); } @@ -531,21 +530,21 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) static void pstip_flush(struct draw_stage *stage, unsigned flags) { - /*struct draw_context *draw = stage->draw;*/ + struct draw_context *draw = stage->draw; struct pstip_stage *pstip = pstip_stage(stage); struct pipe_context *pipe = pstip->pipe; stage->tri = pstip_first_tri; stage->next->flush( stage->next, flags ); - /* restore original frag shader */ + /* restore original frag shader, texture, sampler state */ + draw->suspend_flushing = TRUE; pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); - - /* XXX restore original texture, sampler state */ pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, pstip->num_textures, pstip->state.textures); + draw->suspend_flushing = FALSE; } @@ -560,6 +559,11 @@ static void pstip_destroy(struct draw_stage *stage) { struct pstip_stage *pstip = pstip_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); @@ -579,8 +583,8 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.draw = draw; pstip->stage.next = NULL; - pstip->stage.point = passthrough_point; - pstip->stage.line = passthrough_line; + pstip->stage.point = draw_pipe_passthrough_point; + pstip->stage.line = draw_pipe_passthrough_line; pstip->stage.tri = pstip_first_tri; pstip->stage.flush = pstip_flush; pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; @@ -690,8 +694,10 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ pstip->state.stipple = stipple; + /* pass-through */ pstip->driver_set_polygon_stipple(pstip->pipe, stipple); @@ -699,13 +705,12 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, } - /** - * Called by drivers that want to install this AA line prim stage + * Called by drivers that want to install this polygon stipple stage * into the draw module's pipeline. This will not be used if the - * hardware has native support for AA lines. + * hardware has native support for polygon stipple. */ -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe) { @@ -714,17 +719,22 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->draw = (void *) draw; /* - * Create / install AA line drawing / prim stage + * Create / install pgon stipple drawing / prim stage */ pstip = draw_pstip_stage( draw ); - assert(pstip); + if (pstip == NULL) + goto fail; + draw->pipeline.pstipple = &pstip->stage; pstip->pipe = pipe; /* create special texture, sampler state */ - pstip_create_texture(pstip); - pstip_create_sampler(pstip); + if (!pstip_create_texture(pstip)) + goto fail; + + if (!pstip_create_sampler(pstip)) + goto fail; /* save original driver functions */ pstip->driver_create_fs_state = pipe->create_fs_state; @@ -743,4 +753,12 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_sampler_states = pstip_bind_sampler_states; pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + return TRUE; + + fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 506f33512c..3cbced362e 100644 --- a/src/gallium/auxiliary/draw/draw_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -39,7 +39,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_pipe.h" /** Subclass of draw_stage */ @@ -135,6 +135,10 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) float length = MAX2(dx, dy); int i; + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stipple->counter = 0; + + /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. */ for (i = 0; i < length; i++) { @@ -195,18 +199,6 @@ stipple_flush(struct draw_stage *stage, unsigned flags) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point( stage->next, header ); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} static void @@ -228,9 +220,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.draw = draw; stipple->stage.next = NULL; - stipple->stage.point = passthrough_point; + stipple->stage.point = draw_pipe_passthrough_point; stipple->stage.line = stipple_first_line; - stipple->stage.tri = passthrough_tri; + stipple->stage.tri = draw_pipe_passthrough_tri; stipple->stage.reset_stipple_counter = reset_stipple_counter; stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 3debaac282..50872fdbe9 100644 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" - +#include "draw_vs.h" +#include "draw_pipe.h" struct twoside_stage { struct draw_stage stage; @@ -85,7 +85,8 @@ static void twoside_tri( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; /* copy back attribs to front attribs */ tmp.v[0] = copy_bfc(twoside, header->v[0], 0); tmp.v[1] = copy_bfc(twoside, header->v[1], 1); @@ -99,21 +100,6 @@ static void twoside_tri( struct draw_stage *stage, } -static void twoside_line( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->line( stage->next, header ); -} - - -static void twoside_point( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->point( stage->next, header ); -} - static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) @@ -187,17 +173,26 @@ static void twoside_destroy( struct draw_stage *stage ) struct draw_stage *draw_twoside_stage( struct draw_context *draw ) { struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + if (twoside == NULL) + goto fail; - draw_alloc_temp_verts( &twoside->stage, 3 ); + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; twoside->stage.draw = draw; twoside->stage.next = NULL; - twoside->stage.point = twoside_point; - twoside->stage.line = twoside_line; + twoside->stage.point = draw_pipe_passthrough_point; + twoside->stage.line = draw_pipe_passthrough_line; twoside->stage.tri = twoside_first_tri; twoside->stage.flush = twoside_flush; twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; twoside->stage.destroy = twoside_destroy; return &twoside->stage; + + fail: + if (twoside) + twoside->stage.destroy( &twoside->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index b07860cd9e..8f97fdedaa 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -36,6 +36,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" struct unfilled_stage { @@ -82,9 +83,9 @@ static void points( struct draw_stage *stage, struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; - if (header->edgeflags & 0x1) point( stage, v0 ); - if (header->edgeflags & 0x2) point( stage, v1 ); - if (header->edgeflags & 0x4) point( stage, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 ); } @@ -95,22 +96,22 @@ static void lines( struct draw_stage *stage, struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; -#if 0 - assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag); - assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag); - assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); -#endif + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stage->next->reset_stipple_counter( stage->next ); - if (header->edgeflags & 0x4) line( stage, v2, v0 ); - if (header->edgeflags & 0x1) line( stage, v0, v1 ); - if (header->edgeflags & 0x2) line( stage, v1, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 ); } /* Unfilled tri: * * Note edgeflags in the vertex struct is not sufficient as we will - * need to manipulate them when decomposing primitives??? + * need to manipulate them when decomposing primitives. + * + * We currently keep the vertex edgeflag and primitive edgeflag mask + * separate until the last possible moment. */ static void unfilled_tri( struct draw_stage *stage, struct prim_header *header ) @@ -147,19 +148,6 @@ static void unfilled_first_tri( struct draw_stage *stage, } -static void unfilled_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void unfilled_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - static void unfilled_flush( struct draw_stage *stage, unsigned flags ) @@ -189,18 +177,27 @@ static void unfilled_destroy( struct draw_stage *stage ) struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) { struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + if (unfilled == NULL) + goto fail; - draw_alloc_temp_verts( &unfilled->stage, 0 ); + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; unfilled->stage.draw = draw; unfilled->stage.next = NULL; unfilled->stage.tmp = NULL; - unfilled->stage.point = unfilled_point; - unfilled->stage.line = unfilled_line; + unfilled->stage.point = draw_pipe_passthrough_point; + unfilled->stage.line = draw_pipe_passthrough_line; unfilled->stage.tri = unfilled_first_tri; unfilled->stage.flush = unfilled_flush; unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; unfilled->stage.destroy = unfilled_destroy; return &unfilled->stage; + + fail: + if (unfilled) + unfilled->stage.destroy( &unfilled->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c new file mode 100644 index 0000000000..04438f4dd0 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +void +draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + +void +draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + +void +draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->tmp = NULL; + stage->nr_tmps = nr; + + if (nr != 0) + { + unsigned i; + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + + if (store == NULL) + return FALSE; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + if (stage->tmp == NULL) { + FREE(store); + return FALSE; + } + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } + + return TRUE; +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + + +/* Reset vertex ids. This is basically a type of flush. + * + * Called only from draw_pipe_vbuf.c + */ +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + if (draw->pipeline.verts) + { + unsigned i; + char *verts = draw->pipeline.verts; + unsigned stride = draw->pipeline.vertex_stride; + + for (i = 0; i < draw->pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } + } +} + diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index e163e078f0..6be1d369c3 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -31,6 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" +#include "draw_context.h" static boolean points( unsigned prim ) { @@ -56,7 +58,8 @@ static boolean triangles( unsigned prim ) * pipeline stages. */ boolean -draw_need_pipeline(const struct draw_context *draw, +draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, unsigned int prim ) { /* Don't have to worry about triangles turning into lines/points @@ -66,30 +69,30 @@ draw_need_pipeline(const struct draw_context *draw, if (lines(prim)) { /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple) return TRUE; /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) + if (rasterizer->line_width > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + if (rasterizer->line_smooth && draw->pipeline.aaline) return TRUE; } if (points(prim)) { /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) + if (rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; /* AA points */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + if (rasterizer->point_smooth && draw->pipeline.aapoint) return TRUE; /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (rasterizer->point_sprite && draw->pipeline.point_sprite) return TRUE; } @@ -97,20 +100,20 @@ draw_need_pipeline(const struct draw_context *draw, if (triangles(prim)) { /* polygon stipple */ - if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple) return TRUE; /* unfilled polygons */ - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) return TRUE; /* polygon offset */ - if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + if (rasterizer->offset_cw || rasterizer->offset_ccw) return TRUE; /* two-side lighting */ - if (draw->rasterizer->light_twoside) + if (rasterizer->light_twoside) return TRUE; } @@ -119,7 +122,7 @@ draw_need_pipeline(const struct draw_context *draw, * * Generally this isn't a reason to require the pipeline, though. * - if (draw->rasterizer->cull_mode) + if (rasterizer->cull_mode) return TRUE; */ @@ -145,15 +148,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold && !draw->rasterizer->line_smooth); /* drawing large points? */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) wide_points = TRUE; else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->wide_point_threshold) + else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) wide_points = TRUE; else wide_points = FALSE; @@ -186,7 +189,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = 1; /* only needed for lines really */ @@ -238,7 +241,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) /* Clip stage */ - if (!draw->rasterizer->bypass_clipping) + if (!draw->bypass_clipping) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; @@ -298,6 +301,8 @@ static void validate_destroy( struct draw_stage *stage ) struct draw_stage *draw_validate_stage( struct draw_context *draw ) { struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + if (stage == NULL) + return NULL; stage->draw = draw; stage->next = NULL; diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index f83b441e93..86a7d1c730 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -40,7 +40,9 @@ #include "draw_vbuf.h" #include "draw_private.h" #include "draw_vertex.h" -#include "draw_vf.h" +#include "draw_pipe.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" /** @@ -56,7 +58,7 @@ struct vbuf_stage { /** Vertex size in bytes */ unsigned vertex_size; - struct draw_vertex_fetch *vf; + struct translate *translate; /* FIXME: we have no guarantee that 'unsigned' is 32bit */ @@ -71,8 +73,11 @@ struct vbuf_stage { unsigned max_indices; unsigned nr_indices; - /** Pipe primitive */ - unsigned prim; + /* Cache point size somewhere it's address won't change: + */ + float point_size; + + struct translate_cache *cache; }; @@ -113,68 +118,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -#if 0 -static INLINE void -dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) -{ - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - unsigned i, j, k; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - debug_printf("EMIT_OMIT:"); - break; - case EMIT_ALL: - assert(i == 0); - assert(j == 0); - debug_printf("EMIT_ALL:\t"); - for(k = 0; k < vinfo->size*4; ++k) - debug_printf("%02x ", *data++); - break; - case EMIT_1F: - debug_printf("EMIT_1F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_1F_PSIZE: - debug_printf("EMIT_1F_PSIZE:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_2F: - debug_printf("EMIT_2F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_3F: - debug_printf("EMIT_3F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - data += sizeof(float); - break; - case EMIT_4F: - debug_printf("EMIT_4F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_4UB: - debug_printf("EMIT_4UB:\t"); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - break; - default: - assert(0); - } - debug_printf("\n"); - } - debug_printf("\n"); -} -#endif /** @@ -184,109 +127,25 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) * have a couple of slots at the beginning (1-dword header, 4-dword * clip pos) that we ignore here. We only use the vertex->data[] fields. */ -static INLINE void +static INLINE ushort emit_vertex( struct vbuf_stage *vbuf, struct vertex_header *vertex ) { -#if 0 - debug_printf("emit vertex %d to %p\n", - vbuf->nr_vertices, vbuf->vertex_ptr); -#endif - - if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { - if(vertex->vertex_id < vbuf->nr_vertices) - return; - else - debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", - vertex->vertex_id, vbuf->nr_vertices); - return; - } - - vertex->vertex_id = vbuf->nr_vertices++; - - if(!vbuf->vf) { - const struct vertex_info *vinfo = vbuf->vinfo; - uint i; - uint count = 0; /* for debug/sanity */ + if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { + /* Hmm - vertices are emitted one at a time - better make sure + * set_buffer is efficient. Consider a special one-shot mode for + * translate. + */ + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + + if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - - for (i = 0; i < vinfo->num_attribs; i++) { - uint j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_ALL: - /* just copy the whole vertex as-is to the vbuf */ - assert(i == 0); - assert(j == 0); - memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); - vbuf->vertex_ptr += vinfo->size; - count += vinfo->size; - break; - case EMIT_HEADER: - memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex)); - *vbuf->vertex_ptr += sizeof(*vertex) / 4; - count += sizeof(*vertex) / 4; - break; - case EMIT_1F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - count++; - break; - case EMIT_1F_PSIZE: - *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); - count++; - break; - case EMIT_2F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - count += 2; - break; - case EMIT_3F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - count += 3; - break; - case EMIT_4F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); - count += 4; - break; - case EMIT_4UB: - *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), - float_to_ubyte( vertex->data[j][1] ), - float_to_ubyte( vertex->data[j][0] ), - float_to_ubyte( vertex->data[j][3] )); - count += 1; - break; - default: - assert(0); - } - } - assert(count == vinfo->size); -#if 0 - { - static float data[256]; - draw_vf_emit_vertex(vbuf->vf, vertex, data); - if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) { - debug_printf("With VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data); - debug_printf("Without VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size); - assert(0); - } - } -#endif - } - else { - draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); - vbuf->vertex_ptr += vbuf->vertex_size/4; + vertex->vertex_id = vbuf->nr_vertices++; } + + return vertex->vertex_id; } @@ -300,9 +159,7 @@ vbuf_tri( struct draw_stage *stage, check_space( vbuf, 3 ); for (i = 0; i < 3; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -317,9 +174,7 @@ vbuf_line( struct draw_stage *stage, check_space( vbuf, 2 ); for (i = 0; i < 2; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -332,43 +187,110 @@ vbuf_point( struct draw_stage *stage, check_space( vbuf, 1 ); - emit_vertex( vbuf, prim->v[0] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); } + + /** * Set the prim type for subsequent vertices. * This may result in a new vertex size. The existing vbuffer (if any) * will be flushed if needed and a new one allocated. */ static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim ) +vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) { - const struct vertex_info *vinfo; - unsigned vertex_size; - - assert(newprim == PIPE_PRIM_POINTS || - newprim == PIPE_PRIM_LINES || - newprim == PIPE_PRIM_TRIANGLES); + struct translate_key hw_key; + unsigned dst_offset; + unsigned i; - vbuf->prim = newprim; - vbuf->render->set_primitive(vbuf->render, newprim); + vbuf->render->set_primitive(vbuf->render, prim); - vinfo = vbuf->render->get_vertex_info(vbuf->render); - vertex_size = vinfo->size * sizeof(float); + /* Must do this after set_primitive() above: + * + * XXX: need some state managment to track when this needs to be + * recalculated. The driver should tell us whether there was a + * state change. + */ + vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - if (vertex_size != vbuf->vertex_size) + if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { vbuf_flush_vertices(vbuf); + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); + } - vbuf->vinfo = vinfo; - vbuf->vertex_size = vertex_size; - if(vbuf->vf) - draw_vf_set_vertex_info(vbuf->vf, - vbuf->vinfo, - vbuf->stage.draw->rasterizer->point_size); - + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + + for (i = 0; i < vbuf->vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + + switch (vbuf->vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + break; + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vbuf->vinfo->num_attribs; + hw_key.output_stride = vbuf->vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!vbuf->translate || + translate_key_compare(&vbuf->translate->key, &hw_key) != 0) + { + translate_key_sanitize(&hw_key); + vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); + + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + } + + vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; + + /* Allocate new buffer? + */ if (!vbuf->vertices) vbuf_alloc_vertices(vbuf); } @@ -422,29 +344,9 @@ vbuf_flush_indices( struct vbuf_stage *vbuf ) assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - switch(vbuf->prim) { - case PIPE_PRIM_POINTS: - break; - case PIPE_PRIM_LINES: - assert(vbuf->nr_indices % 2 == 0); - break; - case PIPE_PRIM_TRIANGLES: - assert(vbuf->nr_indices % 3 == 0); - break; - default: - assert(0); - } - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); vbuf->nr_indices = 0; - - /* don't need to reset point/line/tri functions */ -#if 0 - stage->point = vbuf_first_point; - stage->line = vbuf_first_line; - stage->tri = vbuf_first_tri; -#endif } @@ -486,8 +388,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); vbuf->vertex_ptr = vbuf->vertices; } @@ -525,12 +427,15 @@ static void vbuf_destroy( struct draw_stage *stage ) if(vbuf->indices) align_free( vbuf->indices ); - if(vbuf->vf) - draw_vf_destroy( vbuf->vf ); + if(vbuf->translate) + vbuf->translate->release( vbuf->translate ); if (vbuf->render) vbuf->render->destroy( vbuf->render ); + if (vbuf->cache) + translate_cache_destroy(vbuf->cache); + FREE( stage ); } @@ -542,9 +447,8 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, struct vbuf_render *render ) { struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); - - if(!vbuf) - return NULL; + if (vbuf == NULL) + goto fail; vbuf->stage.draw = draw; vbuf->stage.point = vbuf_first_point; @@ -555,21 +459,27 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->stage.destroy = vbuf_destroy; vbuf->render = render; + vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); - assert(render->max_indices < UNDEFINED_VERTEX_ID); - vbuf->max_indices = render->max_indices; - vbuf->indices = (ushort *) - align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); - if(!vbuf->indices) - vbuf_destroy(&vbuf->stage); + vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * + sizeof(vbuf->indices[0]), + 16 ); + if (!vbuf->indices) + goto fail; + + vbuf->cache = translate_cache_create(); + if (!vbuf->cache) + goto fail; + vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; - - vbuf->prim = ~0; - - if(!GETENV("GALLIUM_NOVF")) - vbuf->vf = draw_vf_create(); return &vbuf->stage; + + fail: + if (vbuf) + vbuf_destroy(&vbuf->stage); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 9a168ce8bd..878c9c7169 100644 --- a/src/gallium/auxiliary/draw/draw_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" +#include "draw_pipe.h" struct wideline_stage { @@ -48,19 +49,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) } -static void wideline_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void wideline_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - /** * Draw a wide line by drawing a quad (two triangles). @@ -179,9 +167,9 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.draw = draw; wide->stage.next = NULL; - wide->stage.point = wideline_point; + wide->stage.point = draw_pipe_passthrough_point; wide->stage.line = wideline_line; - wide->stage.tri = wideline_tri; + wide->stage.tri = draw_pipe_passthrough_tri; wide->stage.flush = wideline_flush; wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 6fc7c9fcd7..ed08573382 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -31,7 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" struct widepoint_stage { @@ -60,23 +61,6 @@ widepoint_stage( struct draw_stage *stage ) } -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void widepoint_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void widepoint_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} /** @@ -199,16 +183,16 @@ static void widepoint_first_point( struct draw_stage *stage, wide->ybias = 0.0; if (draw->rasterizer->gl_rasterization_rules) { - wide->ybias = -0.125; + wide->xbias = 0.125; } /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->wide_point_threshold) || - (draw->rasterizer->point_sprite && draw->point_sprite)) { + if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) { stage->point = widepoint_point; } else { - stage->point = passthrough_point; + stage->point = draw_pipe_passthrough_point; } if (draw->rasterizer->point_sprite) { @@ -265,17 +249,26 @@ static void widepoint_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) { struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + if (wide == NULL) + goto fail; - draw_alloc_temp_verts( &wide->stage, 4 ); + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; wide->stage.draw = draw; wide->stage.next = NULL; wide->stage.point = widepoint_first_point; - wide->stage.line = widepoint_line; - wide->stage.tri = widepoint_tri; + wide->stage.line = draw_pipe_passthrough_line; + wide->stage.tri = draw_pipe_passthrough_tri; wide->stage.flush = widepoint_flush; wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; return &wide->stage; + + fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c deleted file mode 100644 index 51b6950334..0000000000 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ /dev/null @@ -1,523 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_private.h" -#include "draw_context.h" - - - -#define RP_NONE 0 -#define RP_POINT 1 -#define RP_LINE 2 -#define RP_TRI 3 - - -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - RP_POINT, - RP_LINE, - RP_LINE, - RP_LINE, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI -}; - - -static void draw_prim_queue_flush( struct draw_context *draw ) -{ - unsigned i; - - if (0) - debug_printf("Flushing with %d prims, %d verts\n", - draw->pq.queue_nr, draw->vs.queue_nr); - - assert (draw->pq.queue_nr != 0); - - /* NOTE: we cannot save draw->pipeline->first in a local var because - * draw->pipeline->first is often changed by the first call to tri(), - * line(), etc. - */ - if (draw->rasterizer->line_stipple_enable) { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_POINT: - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - else { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_POINT: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - - draw->pq.queue_nr = 0; - draw->vs.post_nr = 0; - draw_vertex_cache_unreference( draw ); -} - -void draw_do_flush( struct draw_context *draw, unsigned flags ) -{ - if (0) - debug_printf("Flushing with %d verts, %d prims\n", - draw->vs.queue_nr, - draw->pq.queue_nr ); - - if (draw->flushing) - return; - - draw->flushing = TRUE; - - if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - if (draw->vs.queue_nr) { - (*draw->shader_queue_flush)(draw); - } - - if (flags >= DRAW_FLUSH_PRIM_QUEUE) { - if (draw->pq.queue_nr) - draw_prim_queue_flush(draw); - - if (flags >= DRAW_FLUSH_VERTEX_CACHE) { - draw_vertex_cache_invalidate(draw); - - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; - } - } - } - } - - draw->flushing = FALSE; -} - - - -/* Return a pointer to a freshly queued primitive header. Ensure that - * there is room in the vertex cache for a maximum of "nr_verts" new - * vertices. Flush primitive and/or vertex queues if necessary to - * make space. - */ -static struct prim_header *get_queued_prim( struct draw_context *draw, - unsigned nr_verts ) -{ - if (!draw_vertex_cache_check_space( draw, nr_verts )) { -// debug_printf("v"); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); - } - else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { -// debug_printf("p"); - draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); - } - - assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH); - - return &draw->pq.queue[draw->pq.queue_nr++]; -} - - - -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ -static void do_point( struct draw_context *draw, - unsigned i0 ) -{ - struct prim_header *prim = get_queued_prim( draw, 1 ); - - prim->reset_line_stipple = 0; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); -} - - -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ -static void do_line( struct draw_context *draw, - boolean reset_stipple, - unsigned i0, - unsigned i1 ) -{ - struct prim_header *prim = get_queued_prim( draw, 2 ); - - prim->reset_line_stipple = reset_stipple; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); -} - -/** - * Add a triangle to the primitive queue. - */ -static void do_triangle( struct draw_context *draw, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - -// _mesa_printf("tri %d %d %d\n", i0, i1, i2); - prim->reset_line_stipple = 1; - prim->edgeflags = ~0; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); - prim->v[2] = draw->vcache.get_vertex( draw, i2 ); -} - -static void do_ef_triangle( struct draw_context *draw, - boolean reset_stipple, - unsigned ef_mask, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 ); - struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 ); - struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 ); - - prim->reset_line_stipple = reset_stipple; - - prim->edgeflags = ef_mask & ((v0->edgeflag << 0) | - (v1->edgeflag << 1) | - (v2->edgeflag << 2)); - prim->pad = 0; - prim->v[0] = v0; - prim->v[1] = v1; - prim->v[2] = v2; -} - - -static void do_ef_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - const unsigned omitEdge2 = ~(1 << 1); - const unsigned omitEdge3 = ~(1 << 2); - do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 ); - do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 ); -} - -static void do_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - do_triangle( draw, v0, v1, v3 ); - do_triangle( draw, v1, v2, v3 ); -} - - -/** - * Main entrypoint to draw some number of points/lines/triangles - */ -static void -draw_prim( struct draw_context *draw, - unsigned prim, unsigned start, unsigned count ) -{ - unsigned i; - boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); - boolean flatfirst = - (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE; - -// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - do_point( draw, - start + i ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 0, - start + i + 1); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, /* XXX: only if vb not split */ - start + i - 1, - start + i ); - } - - do_line( draw, - 0, - start + count - 1, - start + 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 0, - start + i + 2, - start + i + 1 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - else { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 1, - start + i + 0, - start + i + 2 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - if (unfilled) { - for (i = 0; i+3 < count; i += 4) { - do_ef_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - do_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (unfilled) { - for (i = 0; i+3 < count; i += 2) { - do_ef_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - do_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - break; - - case PIPE_PRIM_POLYGON: - if (unfilled) { - unsigned ef_mask = (1<<2) | (1<<0); - - for (i = 0; i+2 < count; i++) { - - if (i + 3 >= count) - ef_mask |= (1<<1); - - do_ef_triangle( draw, - i == 0, - ef_mask, - start + i + 1, - start + i + 2, - start + 0); - - ef_mask &= ~(1<<2); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0); - } - } - break; - - default: - assert(0); - break; - } -} - - - - -/** - * Draw vertex arrays - * This is the main entrypoint into the drawing module. - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw - */ -void -draw_arrays(struct draw_context *draw, unsigned prim, - unsigned start, unsigned count) -{ - if (reduced_prim[prim] != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->reduced_prim = reduced_prim[prim]; - } - - /* drawing done here: */ - if (!draw_pt_arrays(draw, prim, start, count)) { - /* we have to run the whole pipeline */ - draw_prim(draw, prim, start, count); - } -} - - diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4d056f6dba..cee58bbf73 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,7 +44,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" #include "tgsi/util/tgsi_scan.h" @@ -52,9 +51,11 @@ struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; +struct draw_vertex_shader; +struct draw_context; +struct draw_stage; +struct vbuf_render; -struct draw_pt_middle_end; -struct draw_pt_front_end; /** * Basic vertex info. @@ -68,115 +69,14 @@ struct vertex_header { float clip[4]; - float data[][4]; /* Note variable size */ + /* This will probably become float (*data)[4] soon: + */ + float data[][4]; }; /* NOTE: It should match vertex_id size above */ #define UNDEFINED_VERTEX_ID 0xffff -/* XXX This is too large */ -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) - - - -/** - * Basic info for a point/line/triangle primitive. - */ -struct prim_header { - float det; /**< front/back face determinant */ - unsigned reset_line_stipple:1; - unsigned edgeflags:3; - unsigned pad:28; - struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ -}; - - - -struct draw_context; - -/** - * Base class for all primitive drawing stages. - */ -struct draw_stage -{ - struct draw_context *draw; /**< parent context */ - - struct draw_stage *next; /**< next stage in pipeline */ - - struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ - unsigned nr_tmps; - - void (*point)( struct draw_stage *, - struct prim_header * ); - - void (*line)( struct draw_stage *, - struct prim_header * ); - - void (*tri)( struct draw_stage *, - struct prim_header * ); - - void (*flush)( struct draw_stage *, - unsigned flags ); - - void (*reset_stipple_counter)( struct draw_stage * ); - - void (*destroy)( struct draw_stage * ); -}; - - -#define PRIM_QUEUE_LENGTH 32 -#define VCACHE_SIZE 32 -#define VCACHE_OVERFLOW 4 -#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ - -/** - * Private version of the compiled vertex_shader - */ -struct draw_vertex_shader { - - /* This member will disappear shortly: - */ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - void (*prepare)( struct draw_vertex_shader *shader, - struct draw_context *draw ); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - void (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ); - - - void (*delete)( struct draw_vertex_shader * ); -}; - - -/* Internal function for vertex fetch. - */ -typedef void (*fetch_func)(const void *ptr, float *attrib); - -fetch_func draw_get_fetch_func( enum pipe_format format ); - - - -typedef void (*full_fetch_func)( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -typedef void (*pt_fetch_func)( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ); - - -struct vbuf_render; /** * Private context for the drawing module. @@ -203,6 +103,17 @@ struct draw_context struct draw_stage *wide_line; struct draw_stage *wide_point; struct draw_stage *rasterize; + + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ + + /* Temporary storage while the pipeline is being run: + */ + char *verts; + unsigned vertex_stride; + unsigned vertex_count; } pipeline; @@ -211,70 +122,64 @@ struct draw_context /* Support prototype passthrough path: */ struct { - unsigned prim; /* XXX: to be removed */ - unsigned hw_vertex_size; /* XXX: to be removed */ - struct { struct draw_pt_middle_end *fetch_emit; - struct draw_pt_middle_end *fetch_pipeline; - struct draw_pt_middle_end *fetch_shade_emit; - struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit; + struct draw_pt_middle_end *general; } middle; struct { - struct draw_pt_front_end *noop; - struct draw_pt_front_end *split_arrays; struct draw_pt_front_end *vcache; + struct draw_pt_front_end *varray; } front; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + + /* user-space vertex data, buffers */ struct { - char *verts; - unsigned vertex_stride; - unsigned vertex_count; - } pipeline; + const unsigned *edgeflag; + + /** vertex element/index buffer (ex: glDrawElements) */ + const void *elts; + /** bytes per index (0, 1, 2 or 4) */ + unsigned eltSize; + + /** vertex arrays */ + const void *vbuffer[PIPE_MAX_ATTRIBS]; + + /** constant buffer (for vertex shader) */ + const void *constants; + } user; } pt; - boolean flushing; + struct { + boolean bypass_clipping; + } driver; + + boolean flushing; /**< debugging/sanity */ + boolean suspend_flushing; /**< internally set */ + boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct draw_vertex_shader *vertex_shader; boolean identity_viewport; uint num_vs_outputs; /**< convenience, from vertex_shader */ - /* user-space vertex data, buffers */ - struct { - const unsigned *edgeflag; - - /** vertex element/index buffer (ex: glDrawElements) */ - const void *elts; - /** bytes per index (0, 1, 2 or 4) */ - unsigned eltSize; - - /** vertex arrays */ - const void *vbuffer[PIPE_MAX_ATTRIBS]; - - /** constant buffer (for vertex shader) */ - const void *constants; - } user; /* Clip derived state: */ float plane[12][4]; unsigned nr_planes; - float wide_point_threshold; /**< convert pnts to tris if larger than this */ - float wide_line_threshold; /**< convert lines to tris if wider than this */ - boolean line_stipple; /**< do line stipple? */ - boolean point_sprite; /**< convert points to quads for sprites? */ - boolean use_sse; - /* If a prim stage introduces new vertex attributes, they'll be stored here */ struct { @@ -288,61 +193,6 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; - /* Vertex fetch internal state - */ - struct { - const ubyte *src_ptr[PIPE_MAX_ATTRIBS]; - unsigned pitch[PIPE_MAX_ATTRIBS]; - fetch_func fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_attrs; - full_fetch_func fetch_func; - pt_fetch_func pt_fetch; - } vertex_fetch; - - /* Post-tnl vertex cache: - */ - struct { - unsigned referenced; /**< bitfield */ - - struct { - unsigned in; /* client array element */ - unsigned out; /* index in vs queue/array */ - } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - - unsigned overflow; - - /** To find space in the vertex cache: */ - struct vertex_header *(*get_vertex)( struct draw_context *draw, - unsigned i ); - } vcache; - - /* Vertex shader queue: - */ - struct { - struct { - unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *vertex; - } queue[VS_QUEUE_LENGTH]; - unsigned queue_nr; - unsigned post_nr; - } vs; - - /** - * Run the vertex shader on all vertices in the vertex queue. - */ - void (*shader_queue_flush)(struct draw_context *draw); - - /* Prim pipeline queue: - */ - struct { - /* Need to queue up primitives until their vertices have been - * transformed by a vs queue flush. - */ - struct prim_header queue[PRIM_QUEUE_LENGTH]; - unsigned queue_nr; - } pq; - - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; @@ -351,101 +201,70 @@ struct draw_context -extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); -extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); -extern struct draw_stage *draw_offset_stage( struct draw_context *context ); -extern struct draw_stage *draw_clip_stage( struct draw_context *context ); -extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); -extern struct draw_stage *draw_cull_stage( struct draw_context *context ); -extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); -extern struct draw_stage *draw_validate_stage( struct draw_context *context ); -extern void draw_free_temp_verts( struct draw_stage *stage ); -extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); -extern void draw_reset_vertex_ids( struct draw_context *draw ); +/******************************************************************************* + * Vertex processing (was passthrough) code: + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); -extern int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ); +/******************************************************************************* + * Primitive processing (pipeline) code: + */ -extern void draw_vertex_cache_invalidate( struct draw_context *draw ); -extern void draw_vertex_cache_unreference( struct draw_context *draw ); -extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); +boolean draw_pipeline_init( struct draw_context *draw ); +void draw_pipeline_destroy( struct draw_context *draw ); -extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); -extern void draw_update_vertex_fetch( struct draw_context *draw ); -extern boolean draw_need_pipeline(const struct draw_context *draw, - unsigned prim ); -/* Passthrough mode (second attempt): +/* We use the top few bits in the elts[] parameter to convey a little + * API information. This limits the number of vertices we can address + * to only 4096 -- if that becomes a problem, we can switch to 32-bit + * draw indices. + * + * These flags expected at first vertex of lines & triangles when + * unfilled and/or line stipple modes are operational. */ -boolean draw_pt_init( struct draw_context *draw ); -void draw_pt_destroy( struct draw_context *draw ); -boolean draw_pt_arrays( struct draw_context *draw, +#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12) +#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12) +#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12) +#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12) +#define DRAW_PIPE_RESET_STIPPLE (0x8<<12) +#define DRAW_PIPE_FLAG_MASK (0xf<<12) + +void draw_pipeline_run( struct draw_context *draw, unsigned prim, - unsigned start, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, unsigned count ); -void draw_pt_reset_vertex_ids( struct draw_context *draw ); -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - char *verts, - unsigned vertex_stride, - unsigned vertex_count, - const ushort *elts, - unsigned count ); - - -#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ -#define DRAW_FLUSH_PRIM_QUEUE 0x2 -#define DRAW_FLUSH_VERTEX_CACHE 0x4 + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ); + + + +/******************************************************************************* + * Flushing + */ + #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 void draw_do_flush( struct draw_context *draw, unsigned flags ); -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ); - -/** - * Get a writeable copy of a vertex. - * \param stage drawing stage info - * \param vert the vertex to copy (source) - * \param idx index into stage's tmp[] array to put the copy (dest) - * \return pointer to the copied vertex - */ -static INLINE struct vertex_header * -dup_vert( struct draw_stage *stage, - const struct vertex_header *vert, - unsigned idx ) -{ - struct vertex_header *tmp = stage->tmp[idx]; - const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); - memcpy(tmp, vert, vsize); - tmp->vertex_id = UNDEFINED_VERTEX_ID; - return tmp; -} - -static INLINE float -dot4(const float *a, const float *b) -{ - float result = (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); - return result; -} #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f59fb86f78..c9c5d18313 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,153 +36,63 @@ #include "draw/draw_pt.h" -/* XXX: Shouldn't those two functions below use the '>' operator??? - */ - -static boolean too_many_verts( struct draw_context *draw, - unsigned verts ) -{ - return verts < 1024; -} - -static boolean too_many_elts( struct draw_context *draw, - unsigned elts ) -{ - return elts < (16 * 1024); -} -boolean +/* Overall we split things into: + * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - middle -- fetch, shade, cliptest, viewport + * - pipeline -- the prim pipeline: clipping, wide lines, etc + * - backend -- the vbuf_render provided by the driver. + */ +static boolean draw_pt_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - const boolean pipeline = draw_need_pipeline(draw, prim); - const boolean cliptest = !draw->rasterizer->bypass_clipping; - const boolean shading = !draw->rasterizer->bypass_vs; struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; + unsigned opt = 0; - - /* Overall we do: - * - frontend -- prepare fetch_elts, draw_elts - eg vcache - * - middle -- fetch, shade, cliptest, viewport - * - pipeline -- the prim pipeline: clipping, wide lines, etc - * - backend -- the vbuf_render provided by the driver. - */ - - - if (!cliptest && !pipeline && !shading) { - /* This is the 'passthrough' path: - */ - /* Fetch user verts, emit hw verts: - */ - middle = draw->pt.middle.fetch_emit; - } - else if (!cliptest && !shading) { - /* This is the 'passthrough' path targetting the pipeline backend. - */ - /* Fetch user verts, emit pipeline verts, run pipeline: - */ - middle = draw->pt.middle.fetch_pipeline; + if (!draw->render) { + opt |= PT_PIPELINE; } -#if 0 - else if (!cliptest && !pipeline) { - /* Fetch user verts, run vertex shader, emit hw verts: - */ - middle = draw->pt.middle.fetch_shade_emit; - } - else if (!pipeline) { - /* Even though !pipeline, we have to run it to get clipping. We - * do know that the pipeline is just the clipping operation, but - * that probably doesn't help much. - * - * This is going to be the most important path for a lot of - * swtnl cards. - */ - /* Fetch user verts, - * run vertex shader, - * cliptest and viewport trasform - * if no clipped vertices, - * emit hw verts - * else - * run pipline - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; - } - else if (!cliptest) { - /* Fetch user verts, run vertex shader, run pipeline: - */ - middle = draw->pt.middle.fetch_shade_pipeline; + + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { + opt |= PT_PIPELINE; } - else { - /* This is what we're currently always doing: - */ - /* Fetch user verts, run vertex shader, cliptest, run pipeline: - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline; + + if (!draw->bypass_clipping) { + opt |= PT_CLIPTEST; } -#else - else { - return FALSE; + + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; } -#endif + if (opt) + middle = draw->pt.middle.general; + else + middle = draw->pt.middle.fetch_emit; - /* If !pipeline, need to make sure we respect the driver's limited - * capabilites to receive blocks of vertex data and elements. - */ -#if 0 - if (!pipeline) { - unsigned vertex_mode = passthrough; - unsigned nr_verts = count_vertices( draw, start, count ); - unsigned hw_prim = prim; - - if (is_elts(draw)) { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - - if (too_many_verts(nr_verts)) { - /* if (is_verts(draw) && can_split(prim)) { - draw = draw_arrays_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } - - if (too_many_elts(count)) { - - /* if (is_elts(draw) && can_split(prim)) { - draw = draw_elts_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } - - if (!good_prim(hw_prim)) { - draw = draw->pt.front.vcache; - } - } -#else - frontend = draw->pt.front.vcache; -#endif - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + /* Pick the right frontend */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (draw->pt.user.elts || + count >= 256) { + frontend = draw->pt.front.vcache; + } else { + frontend = draw->pt.front.varray; + } - frontend->prepare( frontend, prim, middle ); + frontend->prepare( frontend, prim, middle, opt ); - frontend->run( frontend, - draw_pt_elt_func( draw ), - draw_pt_elt_ptr( draw, start ), - count ); + frontend->run(frontend, + draw_pt_elt_func(draw), + draw_pt_elt_ptr(draw, start), + count); frontend->finish( frontend ); @@ -192,16 +102,20 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); - if (!draw->pt.middle.fetch_emit) + draw->pt.front.vcache = draw_pt_vcache( draw ); + if (!draw->pt.front.vcache) return FALSE; - draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw ); - if (!draw->pt.middle.fetch_pipeline) + draw->pt.front.varray = draw_pt_varray(draw); + if (!draw->pt.front.varray) return FALSE; - draw->pt.front.vcache = draw_pt_vcache( draw ); - if (!draw->pt.front.vcache) + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); + if (!draw->pt.middle.fetch_emit) + return FALSE; + + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.general) return FALSE; return TRUE; @@ -210,18 +124,68 @@ boolean draw_pt_init( struct draw_context *draw ) void draw_pt_destroy( struct draw_context *draw ) { + if (draw->pt.middle.general) { + draw->pt.middle.general->destroy( draw->pt.middle.general ); + draw->pt.middle.general = NULL; + } + if (draw->pt.middle.fetch_emit) { draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); draw->pt.middle.fetch_emit = NULL; } - if (draw->pt.middle.fetch_pipeline) { - draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline ); - draw->pt.middle.fetch_pipeline = NULL; - } - if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; } + + if (draw->pt.front.varray) { + draw->pt.front.varray->destroy( draw->pt.front.varray ); + draw->pt.front.varray = NULL; + } +} + + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/** + * Draw vertex arrays + * This is the main entrypoint into the drawing module. + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + */ +void +draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count) +{ + if (reduced_prim[prim] != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim[prim]; + } + + /* drawing done here: */ + draw_pt_arrays(draw, prim, start, count); +} + +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ) +{ + if (draw->pt.user.edgeflag) + return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0; + else + return 1; } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 590823fd33..2dec376cee 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -40,14 +40,11 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; -/* We use the top couple of bits in the vertex fetch index to convey a - * little API information. This limits the number of vertices we can - * address to only 1 billion -- if that becomes a problem, these could - * be moved out & passed separately. - */ -#define DRAW_PT_EDGEFLAG (1<<30) -#define DRAW_PT_RESET_STIPPLE (1<<31) -#define DRAW_PT_FLAG_MASK (3<<30) + +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 /* The "front end" - prepare sets of fetch, draw elements for the @@ -64,7 +61,8 @@ struct draw_context; struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, unsigned prim, - struct draw_pt_middle_end * ); + struct draw_pt_middle_end *, + unsigned opt ); void (*run)( struct draw_pt_front_end *, pt_elt_func elt_func, @@ -82,15 +80,11 @@ struct draw_pt_front_end { * Currently two versions of this: * - fetch, vertex shade, cliptest, prim-pipeline * - fetch, emit (ie passthrough) - * Later: - * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit - * - fetch, vertex shade, emit - * - * Currenly only using the passthrough version. */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned prim ); + unsigned prim, + unsigned opt ); void (*run)( struct draw_pt_middle_end *, const unsigned *fetch_elts, @@ -104,12 +98,9 @@ struct draw_pt_middle_end { /* The "back end" - supplied by the driver, defined in draw_vbuf.h. - * - * Not sure whether to wrap the prim pipeline up as an alternate - * backend. Would be a win for everything except pure passthrough - * mode... */ struct vbuf_render; +struct vertex_header; /* Helper functions. @@ -118,12 +109,89 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ); const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ); -/* Implementations: +/* Frontends: + * + * Currently only the general-purpose vcache implementation, could add + * a special case for tiny vertex buffers. */ struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); + +/* Middle-ends: + * + * Currently one general-purpose case which can do all possibilities, + * at the slight expense of creating a vertex_header in some cases + * unecessarily. + * + * The special case fetch_emit code avoids pipeline vertices + * altogether and builds hardware vertices directly from API + * vertex_elements. + */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); + + +/* More helpers: + */ +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ); + + +/******************************************************************************* + * HW vertex emit: + */ +struct pt_emit; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim ); + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); + +void draw_pt_emit_destroy( struct pt_emit *emit ); + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); + + +/******************************************************************************* + * API vertex fetch: + */ + +struct pt_fetch; +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ); + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ); + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ); + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); + +/******************************************************************************* + * Post-VS: cliptest, rhw, viewport + */ +struct pt_post_vs; + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned stride, + unsigned count ); + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ); + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index d49770e7b2..2094c081ed 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -59,7 +59,7 @@ static unsigned elt_vert( const void *elts, unsigned idx ) pt_elt_func draw_pt_elt_func( struct draw_context *draw ) { - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return elt_vert; case 1: return elt_ubyte; case 2: return elt_ushort; @@ -71,9 +71,9 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ) const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ) { - const char *elts = draw->user.elts; + const char *elts = draw->pt.user.elts; - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return (const void *)(((const ubyte *)NULL) + start); case 1: diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c new file mode 100644 index 0000000000..ce3a153f64 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +struct pt_emit { + struct draw_context *draw; + + struct translate *translate; + + struct translate_cache *cache; +}; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim ) +{ + struct draw_context *draw = emit->draw; + const struct vertex_info *vinfo; + unsigned dst_offset; + struct translate_key hw_key; + unsigned i; + boolean ok; + + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); + + + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; + + if (!emit->translate || + translate_key_compare(&emit->translate->key, &hw_key) != 0) + { + translate_key_sanitize(&hw_key); + emit->translate = translate_cache_find(emit->cache, &hw_key); + } +} + + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)vertex_count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, + 0, + vertex_data, + stride ); + + translate->set_buffer(translate, + 1, + &draw->rasterizer->point_size, + 0); + + translate->run( translate, + 0, + vertex_count, + hw_verts ); + + render->draw(render, + elts, + count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) +{ + struct pt_emit *emit = CALLOC_STRUCT(pt_emit); + if (!emit) + return NULL; + + emit->draw = draw; + emit->cache = translate_cache_create(); + if (!emit->cache) { + FREE(emit); + return NULL; + } + + return emit; +} + +void draw_pt_emit_destroy( struct pt_emit *emit ) +{ + if (emit->cache) + translate_cache_destroy(emit->cache); + + FREE(emit); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c new file mode 100644 index 0000000000..100117a9ae --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + + +struct pt_fetch { + struct draw_context *draw; + + struct translate *translate; + + unsigned vertex_size; + boolean need_edgeflags; + + struct translate_cache *cache; +}; + +/* Perform the fetch from API vertex elements & vertex buffers, to a + * contiguous set of float[4] attributes as required for the + * vertex_shader->run_linear() method. + * + * This is used in all cases except pure passthrough + * (draw_pt_fetch_emit.c) which has its own version to translate + * directly to hw vertices. + * + */ +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ) +{ + struct draw_context *draw = fetch->draw; + unsigned i, nr = 0; + unsigned dst_offset = 0; + struct translate_key key; + + fetch->vertex_size = vertex_size; + + /* Always emit/leave space for a vertex header. + * + * It's worth considering whether the vertex headers should contain + * a pointer to the 'data', rather than having it inline. + * Something to look at after we've fully switched over to the pt + * paths. + */ + { + /* Need to set header->vertex_id = 0xffff somehow. + */ + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; + key.element[nr].input_offset = 0; + key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].output_offset = dst_offset; + dst_offset += 1 * sizeof(float); + nr++; + + + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); + } + + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + key.element[nr].input_format = draw->pt.vertex_element[i].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + dst_offset += 4 * sizeof(float); + nr++; + } + + assert(dst_offset <= vertex_size); + + key.nr_elements = nr; + key.output_stride = vertex_size; + + + if (!fetch->translate || + translate_key_compare(&fetch->translate->key, &key) != 0) + { + translate_key_sanitize(&key); + fetch->translate = translate_cache_find(fetch->cache, &key); + + { + static struct vertex_header vh = { 0, 1, 0, 0xffff }; + fetch->translate->set_buffer(fetch->translate, + draw->pt.nr_vertex_buffers, + &vh, + 0); + } + } + + fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) && + draw->pt.user.edgeflag); +} + + + + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + translate->run_elts( translate, + elts, + count, + verts ); + + /* Edgeflags are hard to fit into a translate program, populate + * them separately if required. In the setup above they are + * defaulted to one, so only need this if there is reason to change + * that default: + */ + if (fetch->need_edgeflags) { + for (i = 0; i < count; i++) { + struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); + vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] ); + } + } +} + + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) +{ + struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); + if (!fetch) + return NULL; + + fetch->draw = draw; + fetch->cache = translate_cache_create(); + if (!fetch->cache) { + FREE(fetch); + return NULL; + } + + return fetch; +} + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ) +{ + translate_cache_destroy(fetch->cache); + + FREE(fetch); +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0806076956..b7b970a297 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,8 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" /* The simplest 'middle end' in the new vertex code. * @@ -72,107 +74,30 @@ struct fetch_emit_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_fetch; - unsigned hw_vertex_size; -}; - - - -static void fetch_R32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; -} - - -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} + struct translate *translate; + const struct vertex_info *vinfo; -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} + /* Cache point size somewhere it's address won't change: + */ + float point_size; -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} + struct translate_cache *cache; +}; -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_emit_middle_end *feme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - struct vbuf_render *render = feme->draw->render; - uint i, j; - - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < feme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (feme->fetch[j].ptr + - feme->fetch[j].pitch * elt); - - feme->fetch[j].fetch( from, attrib ); - feme->fetch[j].emit( attrib, &out ); - } - } -} - static void fetch_emit_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { - static const float zero = 0; struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo; - unsigned i; + unsigned i, dst_offset; boolean ok; + struct translate_key key; ok = draw->render->set_primitive( draw->render, @@ -184,55 +109,93 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, /* Must do this after set_primitive() above: */ - vinfo = draw->render->get_vertex_info(draw->render); + vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render); + + - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned src_element = vinfo->src_index[i]; - unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; - - feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + - draw->vertex_buffer[src_buffer].buffer_offset + - draw->vertex_element[src_element].src_offset); + /* Transform from API vertices to HW vertices, skipping the + * pipeline_vertex intermediate step. + */ + dst_offset = 0; + memset(&key, 0, sizeof(key)); - feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch; - - feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format); + for (i = 0; i < vinfo->num_attribs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; + unsigned emit_sz = 0; + unsigned input_format = src->src_format; + unsigned input_buffer = src->vertex_buffer_index; + unsigned input_offset = src->src_offset; + unsigned output_format; switch (vinfo->emit[i]) { case EMIT_4F: - feme->fetch[i].emit = emit_R32G32B32A32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); break; case EMIT_3F: - feme->fetch[i].emit = emit_R32G32B32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); break; case EMIT_2F: - feme->fetch[i].emit = emit_R32G32_FLOAT; + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); break; case EMIT_1F: - feme->fetch[i].emit = emit_R32_FLOAT; - break; - case EMIT_HEADER: - feme->fetch[i].ptr = (const ubyte *)&zero; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; + input_format = PIPE_FORMAT_R32_FLOAT; + input_buffer = draw->pt.nr_vertex_buffers; + input_offset = 0; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; default: assert(0); - feme->fetch[i].emit = NULL; - break; + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + continue; } + + key.element[i].input_format = input_format; + key.element[i].input_buffer = input_buffer; + key.element[i].input_offset = input_offset; + key.element[i].output_format = output_format; + key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; } - feme->nr_fetch = vinfo->num_attribs; - feme->hw_vertex_size = vinfo->size * 4; + key.nr_elements = vinfo->num_attribs; + key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!feme->translate || + translate_key_compare(&feme->translate->key, &key) != 0) + { + translate_key_sanitize(&key); + feme->translate = translate_cache_find(feme->cache, + &key); + + + feme->translate->set_buffer(feme->translate, + draw->pt.nr_vertex_buffers, + &feme->point_size, + 0); + } + + feme->point_size = draw->rasterizer->point_size; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + feme->translate->set_buffer(feme->translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } } @@ -249,8 +212,12 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, struct draw_context *draw = feme->draw; void *hw_verts; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->hw_vertex_size, + (ushort)feme->translate->key.output_stride, (ushort)fetch_count ); if (!hw_verts) { assert(0); @@ -260,10 +227,19 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, /* Single routine to fetch vertices and emit HW verts. */ - fetch_store_general( feme, - hw_verts, - fetch_elts, - fetch_count ); + feme->translate->run_elts( feme->translate, + fetch_elts, + fetch_count, + hw_verts ); + + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } /* XXX: Draw arrays path to avoid re-emitting index list again and * again. @@ -276,7 +252,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw->render->release_vertices( draw->render, hw_verts, - feme->hw_vertex_size, + feme->translate->key.output_stride, fetch_count ); } @@ -290,6 +266,10 @@ static void fetch_emit_finish( struct draw_pt_middle_end *middle ) static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + + translate_cache_destroy(feme->cache); + FREE(middle); } @@ -297,7 +277,15 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) { struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); - + if (fetch_emit == NULL) + return NULL; + + fetch_emit->cache = translate_cache_create(); + if (!fetch_emit->cache) { + FREE(fetch_emit); + return NULL; + } + fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; fetch_emit->base.finish = fetch_emit_finish; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c deleted file mode 100644 index 4c2a281b29..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ /dev/null @@ -1,328 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" - -/* The simplest 'middle end' in the new vertex code. - * - * The responsibilities of a middle end are to: - * - perform vertex fetch using - * - draw vertex element/buffer state - * - a list of fetch indices we received as an input - * - run the vertex shader - * - cliptest, - * - clip coord calculation - * - viewport transformation - * - if necessary, run the primitive pipeline, passing it: - * - a linear array of vertex_header vertices constructed here - * - a set of draw indices we received as an input - * - otherwise, drive the hw backend, - * - allocate space for hardware format vertices - * - translate the vertex-shader output vertices to hw format - * - calling the backend draw functions. - * - * For convenience, we provide a helper function to drive the hardware - * backend given similar inputs to those required to run the pipeline. - * - * In the case of passthrough mode, many of these actions are disabled - * or noops, so we end up doing: - * - * - perform vertex fetch - * - drive the hw backend - * - * IE, basically just vertex fetch to post-vs-format vertices, - * followed by a call to the backend helper function. - */ - - -struct fetch_pipeline_middle_end { - struct draw_pt_middle_end base; - struct draw_context *draw; - - void (*header)( unsigned idx, float **out); - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - - unsigned nr_fetch; - unsigned pipeline_vertex_size; - unsigned prim; -}; - - - -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} - -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} - -static void header( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - header->clipmask = 0; - header->edgeflag = 1; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -static void header_ef( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - /* XXX: need a reset_stipple flag in the vertex header too? - */ - header->clipmask = 0; - header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_pipeline_middle_end *fpme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - uint i, j; - - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i]; - - fpme->header( elt, &out ); - elt &= ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < fpme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (fpme->fetch[j].ptr + - fpme->fetch[j].pitch * elt); - - fpme->fetch[j].fetch( from, attrib ); - fpme->fetch[j].emit( attrib, &out ); - } - } -} - - -/* We aren't running a vertex shader, but are running the pipeline. - * That means the vertices we need to build look like: - * - * dw0: vertex header (zero?) - * dw1: clip coord 0 - * dw2: clip coord 1 - * dw3: clip coord 2 - * dw4: clip coord 4 - * dw5: screen coord 0 - * dw6: screen coord 0 - * dw7: screen coord 0 - * dw8: screen coord 0 - * dw9: other attribs... - * - */ -static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) -{ - static const float zero = 0; - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned i, nr = 0; - - fpme->prim = prim; - - /* Emit the vertex header and empty clipspace coord field: - */ - if (draw->user.edgeflag) { - fpme->header = header_ef; - } - else { - fpme->header = header; - } - - - /* Need to look at vertex shader inputs (we know it is a - * passthrough shader, so these define the outputs too). If we - * were running a shader, we'd still be looking at the inputs at - * this point. - */ - for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset); - - fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; - fpme->fetch[nr].fetch = draw_get_fetch_func( format ); - - /* Always do this -- somewhat redundant... - */ - fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; - nr++; - } - - fpme->nr_fetch = nr; - fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); -} - - - - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) -{ - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - char *pipeline_verts; - - pipeline_verts = MALLOC( fpme->pipeline_vertex_size * - fetch_count ); - if (!pipeline_verts) { - assert(0); - return; - } - - - /* Single routine to fetch vertices and emit pipeline verts. - */ - fetch_store_general( fpme, - pipeline_verts, - fetch_elts, - fetch_count ); - - - /* Run the pipeline - */ - draw_pt_run_pipeline( fpme->draw, - fpme->prim, - pipeline_verts, - fpme->pipeline_vertex_size, - fetch_count, - draw_elts, - draw_count ); - - - /* Done -- that was easy, wasn't it: - */ - FREE( pipeline_verts ); -} - - - -static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) -{ - /* nothing to do */ -} - -static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) -{ - FREE(middle); -} - - -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ) -{ - struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); - - fetch_pipeline->base.prepare = fetch_pipeline_prepare; - fetch_pipeline->base.run = fetch_pipeline_run; - fetch_pipeline->base.finish = fetch_pipeline_finish; - fetch_pipeline->base.destroy = fetch_pipeline_destroy; - - fetch_pipeline->draw = draw; - - return &fetch_pipeline->base; -} - diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c new file mode 100644 index 0000000000..4ec20493c4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -0,0 +1,235 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" + + +struct fetch_pipeline_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; +}; + + +static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vertex_shader; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + fpme->vertex_size ); + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + draw->bypass_clipping, + draw->identity_viewport, + draw->rasterizer->gl_rasterization_rules ); + + + if (!(opt & PT_PIPELINE)) + draw_pt_emit_prepare( fpme->emit, + prim ); + + /* No need to prepare the shader. + */ + vs->prepare(vs, draw); + +} + + + + +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align_int( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + + +static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) +{ + struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = fetch_pipeline_prepare; + fpme->base.run = fetch_pipeline_run; + fpme->base.finish = fetch_pipeline_finish; + fpme->base.destroy = fetch_pipeline_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + return &fpme->base; + + fail: + if (fpme) + fetch_pipeline_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c deleted file mode 100644 index e70e63d08f..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ /dev/null @@ -1,168 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" - - -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ -static void do_point( struct draw_context *draw, - const char *v0 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 0; - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - - draw->pipeline.first->point( draw->pipeline.first, &prim ); -} - - -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ -static void do_line( struct draw_context *draw, - const char *v0, - const char *v1 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 1; /* fixme */ - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - - draw->pipeline.first->line( draw->pipeline.first, &prim ); -} - -/** - * Add a triangle to the primitive queue. - */ -static void do_triangle( struct draw_context *draw, - char *v0, - char *v1, - char *v2 ) -{ - struct prim_header prim; - - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - prim.v[2] = (struct vertex_header *)v2; - prim.reset_line_stipple = 1; - prim.edgeflags = ((prim.v[0]->edgeflag) | - (prim.v[1]->edgeflag << 1) | - (prim.v[2]->edgeflag << 2)); - prim.pad = 0; - - if (0) debug_printf("tri ef: %d %d %d\n", - prim.v[0]->edgeflag, - prim.v[1]->edgeflag, - prim.v[2]->edgeflag); - - draw->pipeline.first->tri( draw->pipeline.first, &prim ); -} - - - -void draw_pt_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - char *verts = draw->pt.pipeline.verts; - unsigned stride = draw->pt.pipeline.vertex_stride; - - for (i = 0; i < draw->pt.pipeline.vertex_count; i++) { - ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; - verts += stride; - } -} - - -/* Code to run the pipeline on a fairly arbitary collection of vertices. - * - * Vertex headers must be pre-initialized with the - * UNDEFINED_VERTEX_ID, this code will cause that id to become - * overwritten, so it may have to be reset if there is the intention - * to reuse the vertices. - * - * This code provides a callback to reset the vertex id's which the - * draw_vbuf.c code uses when it has to perform a flush. - */ -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - char *verts, - unsigned stride, - unsigned vertex_count, - const ushort *elts, - unsigned count ) -{ - unsigned i; - - draw->pt.pipeline.verts = verts; - draw->pt.pipeline.vertex_stride = stride; - draw->pt.pipeline.vertex_count = vertex_count; - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) - do_point( draw, - verts + stride * elts[i] ); - break; - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) - do_line( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1]); - break; - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) - do_triangle( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1], - verts + stride * elts[i+2]); - break; - } - - draw->pt.pipeline.verts = NULL; - draw->pt.pipeline.vertex_count = 0; -} - diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c new file mode 100644 index 0000000000..c4a67c8289 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -0,0 +1,227 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_context.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct pt_post_vs { + struct draw_context *draw; + + boolean (*run)( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); +}; + + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + + +static INLINE unsigned +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + +#if 0 + debug_printf("compute clipmask %f %f %f %f\n", + clip[0], clip[1], clip[2], clip[3]); + assert(clip[3] != 0.0); +#endif + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= (1<<0); + if ( clip[0] + clip[3] < 0) mask |= (1<<1); + if (-clip[1] + clip[3] < 0) mask |= (1<<2); + if ( clip[1] + clip[3] < 0) mask |= (1<<3); + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/* The normal case - cliptest, rhw divide, viewport transform. + * + * Also handle identity viewport here at the expense of a few wasted + * instructions + */ +static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned clipped = 0; + unsigned j; + + if (0) debug_printf("%s\n"); + + for (j = 0; j < count; j++) { + out->clip[0] = out->data[0][0]; + out->clip[1] = out->data[0][1]; + out->clip[2] = out->data[0][2]; + out->clip[3] = out->data[0][3]; + + out->vertex_id = 0xffff; + out->clipmask = compute_clipmask_gl(out->clip, + pvs->draw->plane, + pvs->draw->nr_planes); + clipped += out->clipmask; + + if (out->clipmask == 0) + { + /* divide by w */ + float w = 1.0f / out->data[0][3]; + + /* Viewport mapping */ + out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2]; + out->data[0][3] = w; +#if 0 + debug_printf("post viewport: %f %f %f %f\n", + out->data[0][0], + out->data[0][1], + out->data[0][2], + out->data[0][3]); +#endif + } + + out = (struct vertex_header *)( (char *)out + stride ); + } + + return clipped != 0; +} + + + +/* If bypass_clipping is set, skip cliptest and rhw divide. + */ +static boolean post_vs_viewport( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned j; + + if (0) debug_printf("%s\n", __FUNCTION__); + for (j = 0; j < count; j++) { + /* Viewport mapping only, no cliptest/rhw divide + */ + out->data[0][0] = out->data[0][0] * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * scale[2] + trans[2]; + + out = (struct vertex_header *)((char *)out + stride); + } + + return FALSE; +} + + +/* If bypass_clipping is set and we have an identity viewport, nothing + * to do. + */ +static boolean post_vs_none( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + if (0) debug_printf("%s\n", __FUNCTION__); + return FALSE; +} + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned count, + unsigned stride ) +{ + return pvs->run( pvs, pipeline_verts, count, stride ); +} + + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ) +{ + if (bypass_clipping) { + if (identity_viewport) + pvs->run = post_vs_none; + else + pvs->run = post_vs_viewport; + } + else { + //if (opengl) + pvs->run = post_vs_cliptest_viewport_gl; + } +} + + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) +{ + struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs ); + if (!pvs) + return NULL; + + pvs->draw = draw; + + return pvs; +} + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ) +{ + FREE(pvs); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c new file mode 100644 index 0000000000..355093f945 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -0,0 +1,249 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +#define FETCH_MAX 256 +#define DRAW_MAX (16*FETCH_MAX) + +struct varray_frontend { + struct draw_pt_front_end base; + struct draw_context *draw; + + ushort draw_elts[DRAW_MAX]; + unsigned fetch_elts[FETCH_MAX]; + + unsigned draw_count; + unsigned fetch_count; + + struct draw_pt_middle_end *middle; + + unsigned input_prim; + unsigned output_prim; +}; + +static void varray_flush(struct varray_frontend *varray) +{ + if (varray->draw_count) { +#if 0 + debug_printf("FLUSH fc = %d, dc = %d\n", + varray->fetch_count, + varray->draw_count); +#endif + varray->middle->run(varray->middle, + varray->fetch_elts, + varray->fetch_count, + varray->draw_elts, + varray->draw_count); + } + + varray->fetch_count = 0; + varray->draw_count = 0; +} + +#if 0 +static void varray_check_flush(struct varray_frontend *varray) +{ + if (varray->draw_count + 6 >= DRAW_MAX/* || + varray->fetch_count + 4 >= FETCH_MAX*/) { + varray_flush(varray); + } +} +#endif + +static INLINE void add_draw_el(struct varray_frontend *varray, + int idx, ushort flags) +{ + varray->draw_elts[varray->draw_count++] = idx | flags; +} + + +static INLINE void varray_triangle( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + add_draw_el(varray, i0, 0); + add_draw_el(varray, i1, 0); + add_draw_el(varray, i2, 0); +} + +static INLINE void varray_triangle_flags( struct varray_frontend *varray, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + add_draw_el(varray, i0, flags); + add_draw_el(varray, i1, 0); + add_draw_el(varray, i2, 0); +} + +static INLINE void varray_line( struct varray_frontend *varray, + unsigned i0, + unsigned i1 ) +{ + add_draw_el(varray, i0, 0); + add_draw_el(varray, i1, 0); +} + + +static INLINE void varray_line_flags( struct varray_frontend *varray, + ushort flags, + unsigned i0, + unsigned i1 ) +{ + add_draw_el(varray, i0, flags); + add_draw_el(varray, i1, 0); +} + + +static INLINE void varray_point( struct varray_frontend *varray, + unsigned i0 ) +{ + add_draw_el(varray, i0, 0); +} + +static INLINE void varray_quad( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + varray_triangle( varray, i0, i1, i3 ); + varray_triangle( varray, i1, i2, i3 ); +} + +static INLINE void varray_ef_quad( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + + varray_triangle_flags( varray, + DRAW_PIPE_RESET_STIPPLE | omitEdge1, + i0, i1, i3 ); + + varray_triangle_flags( varray, + omitEdge2, + i1, i2, i3 ); +} + +/* At least for now, we're back to using a template include file for + * this. The two paths aren't too different though - it may be + * possible to reunify them. + */ +#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) varray_point(vc,i0) +#define FUNC varray_run_extras +#include "draw_pt_varray_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) +#define POINT(vc,i0) varray_point(vc,i0) +#define FUNC varray_run +#include "draw_pt_varray_tmp.h" + + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + + +static void varray_prepare(struct draw_pt_front_end *frontend, + unsigned prim, + struct draw_pt_middle_end *middle, + unsigned opt) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + + if (opt & PT_PIPELINE) + { + varray->base.run = varray_run_extras; + } + else + { + varray->base.run = varray_run; + } + + varray->input_prim = prim; + varray->output_prim = reduced_prim[prim]; + + varray->middle = middle; + middle->prepare(middle, varray->output_prim, opt); +} + + + + +static void varray_finish(struct draw_pt_front_end *frontend) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + varray->middle->finish(varray->middle); + varray->middle = NULL; +} + +static void varray_destroy(struct draw_pt_front_end *frontend) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) +{ + struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); + if (varray == NULL) + return NULL; + + varray->base.prepare = varray_prepare; + varray->base.run = NULL; + varray->base.finish = varray_finish; + varray->base.destroy = varray_destroy; + varray->draw = draw; + + return &varray->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h new file mode 100644 index 0000000000..b9a319b253 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -0,0 +1,144 @@ + +static void FUNC(struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + struct draw_context *draw = varray->draw; + unsigned start = (unsigned)elts; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, flags; + +#if 0 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count); +#endif +#if 0 + debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim, + start, count); +#endif + + for (i = 0; i < count; ++i) { + varray->fetch_elts[i] = start + i; + } + varray->fetch_count = count; + + switch (varray->input_prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT(varray, i + 0); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE(varray, DRAW_PIPE_RESET_STIPPLE, + i + 0, i + 1); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + LINE(varray, flags, i - 1, 0); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1, i + 2); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1 + (i&1), i + 2 - (i&1)); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0 + (i&1), i + 1 - (i&1), i + 2); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + } + else { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, flags, 0, i + 1, i + 2); + } + } + } + break; + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD(varray, i + 0, i + 1, i + 2, i + 3); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD(varray, i + 2, i + 0, i + 1, i + 3); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + } + break; + + default: + assert(0); + break; + } + + varray_flush(varray); +} + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 107dcfc269..6b3fb1406b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -33,8 +33,6 @@ #include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -//#include "draw/draw_vbuf.h" -//#include "draw/draw_vertex.h" #include "draw/draw_pt.h" @@ -86,8 +84,9 @@ static void vcache_check_flush( struct vcache_frontend *vcache ) } -static void vcache_elt( struct vcache_frontend *vcache, - unsigned felt ) +static INLINE void vcache_elt( struct vcache_frontend *vcache, + unsigned felt, + ushort flags ) { unsigned idx = felt % CACHE_MAX; @@ -99,28 +98,9 @@ static void vcache_elt( struct vcache_frontend *vcache, vcache->fetch_elts[vcache->fetch_count++] = felt; } - vcache->draw_elts[vcache->draw_count++] = vcache->out[idx]; + vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags; } -static unsigned add_edgeflag( struct vcache_frontend *vcache, - unsigned idx, - unsigned mask ) -{ - if (mask && draw_get_edgeflag(vcache->draw, idx)) - return idx | DRAW_PT_EDGEFLAG; - else - return idx; -} - - -static unsigned add_reset_stipple( unsigned idx, - unsigned reset ) -{ - if (reset) - return idx | DRAW_PT_RESET_STIPPLE; - else - return idx; -} static void vcache_triangle( struct vcache_frontend *vcache, @@ -128,48 +108,42 @@ static void vcache_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { - vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE); - vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG); - vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); vcache_check_flush(vcache); } -static void vcache_ef_triangle( struct vcache_frontend *vcache, - boolean reset_stipple, - unsigned ef_mask, - unsigned i0, - unsigned i1, - unsigned i2 ) +static void vcache_triangle_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) { - i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 ); - i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 ); - i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 ); - - i0 = add_reset_stipple( i0, reset_stipple ); - - vcache_elt(vcache, i0); - vcache_elt(vcache, i1); - vcache_elt(vcache, i2); + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); vcache_check_flush(vcache); - - if (0) debug_printf("emit tri ef: %d %d %d\n", - !!(i0 & DRAW_PT_EDGEFLAG), - !!(i1 & DRAW_PT_EDGEFLAG), - !!(i2 & DRAW_PT_EDGEFLAG)); - } - static void vcache_line( struct vcache_frontend *vcache, - boolean reset_stipple, unsigned i0, unsigned i1 ) { - i0 = add_reset_stipple( i0, reset_stipple ); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_check_flush(vcache); +} + - vcache_elt(vcache, i0); - vcache_elt(vcache, i1); +static void vcache_line_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); vcache_check_flush(vcache); } @@ -177,7 +151,7 @@ static void vcache_line( struct vcache_frontend *vcache, static void vcache_point( struct vcache_frontend *vcache, unsigned i0 ) { - vcache_elt(vcache, i0); + vcache_elt(vcache, i0, 0); vcache_check_flush(vcache); } @@ -197,237 +171,36 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, unsigned i2, unsigned i3 ) { - const unsigned omitEdge2 = ~(1 << 1); - const unsigned omitEdge3 = ~(1 << 2); - vcache_ef_triangle( vcache, 1, omitEdge2, i0, i1, i3 ); - vcache_ef_triangle( vcache, 0, omitEdge3, i1, i2, i3 ); -} + const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + vcache_triangle_flags( vcache, + DRAW_PIPE_RESET_STIPPLE | omitEdge1, + i0, i1, i3 ); + vcache_triangle_flags( vcache, + omitEdge2, + i1, i2, i3 ); +} +/* At least for now, we're back to using a template include file for + * this. The two paths aren't too different though - it may be + * possible to reunify them. + */ +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run_extras +#include "draw_pt_vcache_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run +#include "draw_pt_vcache_tmp.h" -static void vcache_run( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; - - boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - -// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - vcache_point( vcache, - get_elt(elts, i + 0) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - vcache_line( vcache, - TRUE, - get_elt(elts, i + 0), - get_elt(elts, i + 1)); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - for (i = 1; i < count; i++) { - vcache_line( vcache, - i == 1, /* XXX: only if vb not split */ - get_elt(elts, i - 1), - get_elt(elts, i )); - } - - vcache_line( vcache, - 0, - get_elt(elts, count - 1), - get_elt(elts, 0 )); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - vcache_line( vcache, - i == 1, - get_elt(elts, i - 1), - get_elt(elts, i )); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - vcache_ef_triangle( vcache, - 1, - ~0, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 2), - get_elt(elts, i + 1 )); - } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - else { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 0), - get_elt(elts, i + 2 )); - } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0 )); - } - } - else { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - if (unfilled) { - for (i = 0; i+3 < count; i += 4) { - vcache_ef_quad( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3)); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - vcache_quad( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3)); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (unfilled) { - for (i = 0; i+3 < count; i += 2) { - vcache_ef_quad( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3)); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - vcache_quad( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3)); - } - } - break; - - case PIPE_PRIM_POLYGON: - if (unfilled) { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const unsigned edge_first = (1<<2); - const unsigned edge_middle = (1<<0); - const unsigned edge_last = (1<<1); - - for (i = 0; i+2 < count; i++) { - unsigned ef_mask = edge_middle; - - if (i == 0) - ef_mask |= edge_first; - - if (i + 3 == count) - ef_mask |= edge_last; - - vcache_ef_triangle( vcache, - i == 0, - ef_mask, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } - } - else { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } - } - break; - - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} @@ -448,23 +221,25 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned prim, - struct draw_pt_middle_end *middle ) + struct draw_pt_middle_end *middle, + unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; -/* - if (vcache->draw->rasterizer->flatshade_first) - vcache->base.run = vcache_run_pv0; - else - vcache->base.run = vcache_run_pv2; -*/ + if (opt & PT_PIPELINE) + { + vcache->base.run = vcache_run_extras; + } + else + { + vcache->base.run = vcache_run; + } - vcache->base.run = vcache_run; vcache->input_prim = prim; vcache->output_prim = reduced_prim[prim]; vcache->middle = middle; - middle->prepare( middle, vcache->output_prim ); + middle->prepare( middle, vcache->output_prim, opt ); } @@ -486,6 +261,8 @@ static void vcache_destroy( struct draw_pt_front_end *frontend ) struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) { struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); + if (vcache == NULL) + return NULL; vcache->base.prepare = vcache_prepare; vcache->base.run = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h new file mode 100644 index 0000000000..cf9f394aa3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -0,0 +1,174 @@ + + +static void FUNC( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, flags; + + + switch (vcache->input_prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT( vcache, + get_elt(elts, i + 0) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( vcache, + DRAW_PIPE_RESET_STIPPLE, + get_elt(elts, i + 0), + get_elt(elts, i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, 0 )); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1 + (i&1)), + get_elt(elts, i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0 + (i&1)), + get_elt(elts, i + 1 - (i&1)), + get_elt(elts, i + 2 )); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + } + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( vcache, + flags, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + } + break; + + default: + assert(0); + break; + } + + vcache_flush( vcache ); +} + + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 970adc95e7..1446f785c5 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -52,9 +52,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) switch (vinfo->emit[i]) { case EMIT_OMIT: break; - case EMIT_HEADER: - vinfo->size += sizeof(struct vertex_header) / 4; - break; case EMIT_4UB: /* fall-through */ case EMIT_1F_PSIZE: @@ -71,12 +68,62 @@ draw_compute_vertex_size(struct vertex_info *vinfo) case EMIT_4F: vinfo->size += 4; break; - case EMIT_ALL: - /* fall-through */ default: assert(0); } } +} + + +void +draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ + unsigned i, j; - assert(vinfo->size * 4 <= MAX_VERTEX_SIZE); + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index abd2017ed3..6d8bac5138 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -47,8 +47,6 @@ */ enum attrib_emit { EMIT_OMIT, /**< don't emit the attribute */ - EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ - EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */ EMIT_1F, EMIT_1F_PSIZE, /**< insert constant point size */ EMIT_2F, @@ -108,5 +106,7 @@ draw_emit_vertex_attr(struct vertex_info *vinfo, extern void draw_compute_vertex_size(struct vertex_info *vinfo); +void draw_dump_emitted_vertex(const struct vertex_info *vinfo, + const uint8_t *data); #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c deleted file mode 100644 index c0248979e2..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ /dev/null @@ -1,208 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw_private.h" -#include "draw_context.h" - - -void draw_vertex_cache_invalidate( struct draw_context *draw ) -{ - assert(draw->pq.queue_nr == 0); - assert(draw->vs.queue_nr == 0); - assert(draw->vcache.referenced == 0); - - /* There's an error somewhere in the vcache code that requires this - * memset. The bug is exposed in q3demo demo001, but probably - * elsewhere as well. Will track it down later. - */ - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); -} - - -/** - * Check if vertex is in cache, otherwise add it. It won't go through - * VS yet, not until there is a flush operation or the VS queue fills up. - * - * Note that cache entries are basically just two pointers: the first - * an index into the user's vertex arrays, the second a location in - * the vertex shader cache for the post-transformed vertex. - * - * \return pointer to location of (post-transformed) vertex header in the cache - */ -static struct vertex_header *get_vertex( struct draw_context *draw, - unsigned i ) -{ - unsigned slot = (i + (i>>5)) % VCACHE_SIZE; - - assert(slot < 32); /* so we don't exceed the bitfield size below */ - - if (draw->vcache.referenced & (1<<slot)) - { - /* Cache hit? - */ - if (draw->vcache.idx[slot].in == i) { -// _mesa_printf("HIT %d %d\n", slot, i); - assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; - } - - /* Otherwise a collision - */ - slot = VCACHE_SIZE + draw->vcache.overflow++; -// _mesa_printf("XXX %d --> %d\n", i, slot); - } - - /* Deal with the cache miss: - */ - { - unsigned out; - - assert(slot < Elements(draw->vcache.idx)); - -// _mesa_printf("NEW %d %d\n", slot, i); - draw->vcache.idx[slot].in = i; - draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; - draw->vcache.referenced |= (1 << slot); - - - /* Add to vertex shader queue: - */ - assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - - draw->vs.queue[out].elt = i; - draw->vs.queue[out].vertex->clipmask = 0; - draw->vs.queue[out].vertex->edgeflag = draw_get_edgeflag(draw, i); - draw->vs.queue[out].vertex->pad = 0; - draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; - - /* Need to set the vertex's edge flag here. If we're being called - * by do_ef_triangle(), that function needs edge flag info! - */ - - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; - } -} - - -static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const unsigned *elts = (const unsigned *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ushort *elts = (const ushort *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ubyte *elts = (const ubyte *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - - for (i = 0; i < draw->vs.post_nr; i++) - draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; -} - - -void draw_vertex_cache_unreference( struct draw_context *draw ) -{ - draw->vcache.referenced = 0; - draw->vcache.overflow = 0; -} - - -int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ) -{ - if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { - /* The vs queue is sized so that this can never happen: - */ - assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); - return TRUE; - } - else - return FALSE; -} - - - -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ -void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) -{ -// draw_statechange( draw ); - - /* choose the get_vertex() function to use */ - switch (eltSize) { - case 0: - draw->vcache.get_vertex = get_vertex; - break; - case 1: - draw->vcache.get_vertex = get_ubyte_elt_vertex; - break; - case 2: - draw->vcache.get_vertex = get_ushort_elt_vertex; - break; - case 4: - draw->vcache.get_vertex = get_uint_elt_vertex; - break; - default: - assert(0); - } - draw->user.elts = elements; - draw->user.eltSize = eltSize; -} - diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c deleted file mode 100644 index 9041041006..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ /dev/null @@ -1,528 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" -#include "draw_context.h" - - -#define DRAW_DBG 0 - - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT ) \ -static void \ -fetch_##NAME(const void *ptr, float *attrib) \ -{ \ - static const float defaults[4] = { 0,0,0,1 }; \ - int i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT(i); \ - } \ - \ - for (; i < 4; i++) { \ - attrib[i] = defaults[i]; \ - } \ -} - -#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] -#define CVT_32_FLOAT(i) ((float *) ptr)[i] - -#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] - -#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] -#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] -#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] - -#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f - -#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f - -FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) -FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) -FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) -FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) -FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) -FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) -FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) -FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) -FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) -FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) -FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) -FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) -FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) -FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) -FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) -//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) - - - -static void -fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) -{ - attrib[2] = CVT_8_UNORM(0); - attrib[1] = CVT_8_UNORM(1); - attrib[0] = CVT_8_UNORM(2); - attrib[3] = CVT_8_UNORM(3); -} - - -fetch_func draw_get_fetch_func( enum pipe_format format ) -{ -#if 0 - { - char tmp[80]; - pf_sprint_name(tmp, format); - debug_printf("%s: %s\n", __FUNCTION__, tmp); - } -#endif - - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return fetch_B8G8R8A8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - /* This can get hit because draw-state-validation is too eager, - and can jump in here validating stuff before the state tracker has set - up everything. - */ - /* assert(0); */ - return NULL; - } -} - - -static void -transpose_4x4( float *out, const float *in ) -{ - /* This can be achieved in 12 sse instructions, plus the final - * stores I guess. This is probably a bit more than that - maybe - * 32 or so? - */ - out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12]; - out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13]; - out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14]; - out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15]; -} - - - -static void fetch_xyz_rgb( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - -// debug_printf("%s\n", __FUNCTION__); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - } -} - - - - -static void fetch_xyz_rgb_st( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[2] + elts[i] * pitch[2]); - float *out = &machine->Inputs[2].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = 0.0f; - out[12] = 1.0f; - } - } -} - - - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - assert(count <= 4); - -// debug_printf("%s %d\n", __FUNCTION__, count); - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const ubyte *src = draw->vertex_fetch.src_ptr[attr]; - const fetch_func fetch = draw->vertex_fetch.fetch[attr]; - unsigned i; - float p[4][4]; - - - /* Fetch four attributes for four vertices. - * - * Could fetch directly into AOS format, but this is meant to be - * a prototype for an sse implementation, which would have - * difficulties doing that. - */ - for (i = 0; i < count; i++) - fetch( src + elts[i] * pitch, p[i] ); - - /* Be nice and zero out any missing vertices: - */ - for ( ; i < 4; i++) - p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0; - - /* Transpose/swizzle into sse-friendly format. Currently - * assuming that all vertex shader inputs are float[4], but this - * isn't true -- if the vertex shader only wants tex0.xy, we - * could optimize for that. - * - * To do so fully without codegen would probably require an - * excessive number of fetch functions, but we could at least - * minimize the transpose step: - */ - transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p ); - } -} - - - -void draw_update_vertex_fetch( struct draw_context *draw ) -{ - unsigned nr_attrs, i; - -// debug_printf("%s\n", __FUNCTION__); - - /* this may happend during context init */ - if (!draw->vertex_shader) - return; - - nr_attrs = draw->vertex_shader->info.num_inputs; - - for (i = 0; i < nr_attrs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset; - - draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format ); - } - - draw->vertex_fetch.nr_attrs = nr_attrs; - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; - - switch (nr_attrs) { - case 2: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb; - break; - case 3: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; - break; - default: - break; - } - -} diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c deleted file mode 100644 index 7bb34ace7a..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include <stddef.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "rtasm/rtasm_execmem.h" - -#include "draw_vf.h" - - -#define DRAW_VF_DBG 0 - - -static boolean match_fastpath( struct draw_vertex_fetch *vf, - const struct draw_vf_fastpath *fp) -{ - unsigned j; - - if (vf->attr_count != fp->attr_count) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].format != fp->attr[j].format || - vf->attr[j].inputsize != fp->attr[j].size || - vf->attr[j].vertoffset != fp->attr[j].offset) - return FALSE; - - if (fp->match_strides) { - if (vf->vertex_stride != fp->vertex_stride) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].inputstride != fp->attr[j].stride) - return FALSE; - } - - return TRUE; -} - -static boolean search_fastpath_emit( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp = vf->fastpath; - - for ( ; fp ; fp = fp->next) { - if (match_fastpath(vf, fp)) { - vf->emit = fp->func; - return TRUE; - } - } - - return FALSE; -} - -void draw_vf_register_fastpath( struct draw_vertex_fetch *vf, - boolean match_strides ) -{ - struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath); - unsigned i; - - fastpath->vertex_stride = vf->vertex_stride; - fastpath->attr_count = vf->attr_count; - fastpath->match_strides = match_strides; - fastpath->func = vf->emit; - fastpath->attr = (struct draw_vf_attr_type *) - MALLOC(vf->attr_count * sizeof(fastpath->attr[0])); - - for (i = 0; i < vf->attr_count; i++) { - fastpath->attr[i].format = vf->attr[i].format; - fastpath->attr[i].stride = vf->attr[i].inputstride; - fastpath->attr[i].size = vf->attr[i].inputsize; - fastpath->attr[i].offset = vf->attr[i].vertoffset; - } - - fastpath->next = vf->fastpath; - vf->fastpath = fastpath; -} - - - - -/*********************************************************************** - * Build codegen functions or return generic ones: - */ -static void choose_emit_func( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest) -{ - vf->emit = NULL; - - /* Does this match an existing (hardwired, codegen or known-bad) - * fastpath? - */ - if (search_fastpath_emit(vf)) { - /* Use this result. If it is null, then it is already known - * that the current state will fail for codegen and there is no - * point trying again. - */ - } - else if (vf->codegen_emit) { - vf->codegen_emit( vf ); - } - - if (!vf->emit) { - draw_vf_generate_hardwired_emit(vf); - } - - /* Otherwise use the generic version: - */ - if (!vf->emit) - vf->emit = draw_vf_generic_emit; - - vf->emit( vf, count, dest ); -} - - - - - -/*********************************************************************** - * Public entrypoints, mostly dispatch to the above: - */ - - - -static unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ) -{ - unsigned offset = 0; - unsigned i, j; - - assert(nr < PIPE_MAX_ATTRIBS); - - for (j = 0, i = 0; i < nr; i++) { - const unsigned format = map[i].format; - if (format == DRAW_EMIT_PAD) { -#if (DRAW_VF_DBG) - debug_printf("%d: pad %d, offset %d\n", i, - map[i].offset, offset); -#endif - - offset += map[i].offset; - - } - else { - vf->attr[j].attrib = map[i].attrib; - vf->attr[j].format = format; - vf->attr[j].insert = draw_vf_format_info[format].insert; - vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize; - vf->attr[j].vertoffset = offset; - vf->attr[j].isconst = draw_vf_format_info[format].isconst; - if(vf->attr[j].isconst) - memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize); - -#if (DRAW_VF_DBG) - debug_printf("%d: %s, offset %d\n", i, - draw_vf_format_info[format].name, - vf->attr[j].vertoffset); -#endif - - offset += draw_vf_format_info[format].attrsize; - j++; - } - } - - vf->attr_count = j; - vf->vertex_stride = vertex_stride ? vertex_stride : offset; - vf->emit = choose_emit_func; - - assert(vf->vertex_stride >= offset); - return vf->vertex_stride; -} - - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ) -{ - unsigned i, j, k; - struct draw_vf_attr *a = vf->attr; - struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; - unsigned count = 0; /* for debug/sanity */ - unsigned nr_attrs = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_ALL: { - /* just copy the whole vertex as-is to the vbuf */ - unsigned s = vinfo->size; - assert(i == 0); - assert(j == 0); - /* copy the vertex header */ - /* XXX: we actually don't copy the header, just pad it */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - s -= offsetof(struct vertex_header, data)/4; - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - /* copy the vertex data */ - for(k = 0; k < (s & ~0x3); k += 4) { - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - } - /* tail */ - /* XXX: actually, this shouldn't be needed */ - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].offset = 0; - switch(s & 0x3) { - case 0: - break; - case 1: - attrs[nr_attrs].format = DRAW_EMIT_1F; - nr_attrs++; - count += 1; - break; - case 2: - attrs[nr_attrs].format = DRAW_EMIT_2F; - nr_attrs++; - count += 2; - break; - case 3: - attrs[nr_attrs].format = DRAW_EMIT_3F; - nr_attrs++; - count += 3; - break; - } - break; - } - case EMIT_HEADER: - /* XXX emit new DRAW_EMIT_HEADER attribute??? */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - break; - case EMIT_1F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count++; - break; - case EMIT_1F_PSIZE: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F_CONST; - attrs[nr_attrs].offset = 0; - attrs[nr_attrs].data.f[0] = point_size; - nr_attrs++; - count++; - break; - case EMIT_2F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_2F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 2; - break; - case EMIT_3F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_3F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 3; - break; - case EMIT_4F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - break; - case EMIT_4UB: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 1; - break; - default: - assert(0); - } - } - - assert(count == vinfo->size); - - draw_vf_set_vertex_attributes(vf, - attrs, - nr_attrs, - vinfo->size * sizeof(float) ); - - for (j = 0; j < vf->attr_count; j++) { - a[j].inputsize = 4; - a[j].do_insert = a[j].insert[4 - 1]; - if(a[j].isconst) { - a[j].inputptr = a[j].data; - a[j].inputstride = 0; - } - } -} - - -#if 0 -/* Set attribute pointers, adjusted for start position: - */ -void draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const sources[], - unsigned start ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - const GLvector4f *vptr = sources[a[j].attrib]; - - if ((a[j].inputstride != vptr->stride) || - (a[j].inputsize != vptr->size)) - vf->emit = choose_emit_func; - - a[j].inputstride = vptr->stride; - a[j].inputsize = vptr->size; - a[j].do_insert = a[j].insert[vptr->size - 1]; - a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride; - } -} -#endif - - -/** - * Emit a vertex to dest. - */ -void draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - if (!a[j].isconst) { - a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0]; - a[j].inputstride = 0; /* XXX: one-vertex-max ATM */ - } - } - - vf->emit( vf, 1, (uint8_t*) dest ); -} - - - -struct draw_vertex_fetch *draw_vf_create( void ) -{ - struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); - unsigned i; - - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) - vf->attr[i].vf = vf; - - vf->identity[0] = 0.0; - vf->identity[1] = 0.0; - vf->identity[2] = 0.0; - vf->identity[3] = 1.0; - - vf->codegen_emit = NULL; - -#ifdef USE_SSE_ASM - if (!GETENV("GALLIUM_NO_CODEGEN")) - vf->codegen_emit = draw_vf_generate_sse_emit; -#endif - - return vf; -} - - -void draw_vf_destroy( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp, *tmp; - - for (fp = vf->fastpath ; fp ; fp = tmp) { - tmp = fp->next; - FREE(fp->attr); - - /* KW: At the moment, fp->func is constrained to be allocated by - * rtasm_exec_alloc(), as the hardwired fastpaths in - * t_vertex_generic.c are handled specially. It would be nice - * to unify them, but this probably won't change until this - * module gets another overhaul. - */ - //rtasm_exec_free((void *) fp->func); - FREE(fp); - } - - vf->fastpath = NULL; - FREE(vf); -} diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h deleted file mode 100644 index 7555d1bd58..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright 2008 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * Vertex fetch/store/convert code. This functionality is used in two places: - * 1. Vertex fetch/convert - to grab vertex data from incoming vertex - * arrays and convert to format needed by vertex shaders. - * 2. Vertex store/emit - to convert simple float[][4] vertex attributes - * (which is the organization used throughout the draw/prim pipeline) to - * hardware-specific formats and emit into hardware vertex buffers. - * - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - -#ifndef DRAW_VF_H -#define DRAW_VF_H - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - -#include "draw_vertex.h" -#include "draw_private.h" /* for vertex_header */ - - -enum draw_vf_attr_format { - DRAW_EMIT_1F, - DRAW_EMIT_2F, - DRAW_EMIT_3F, - DRAW_EMIT_4F, - DRAW_EMIT_3F_XYW, /**< for projective texture */ - DRAW_EMIT_1UB_1F, /**< for fog coordinate */ - DRAW_EMIT_3UB_3F_RGB, /**< for specular color */ - DRAW_EMIT_3UB_3F_BGR, /**< for specular color */ - DRAW_EMIT_4UB_4F_RGBA, /**< for color */ - DRAW_EMIT_4UB_4F_BGRA, /**< for color */ - DRAW_EMIT_4UB_4F_ARGB, /**< for color */ - DRAW_EMIT_4UB_4F_ABGR, /**< for color */ - DRAW_EMIT_1F_CONST, - DRAW_EMIT_2F_CONST, - DRAW_EMIT_3F_CONST, - DRAW_EMIT_4F_CONST, - DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */ - DRAW_EMIT_MAX -}; - -struct draw_vf_attr_map -{ - /** Input attribute number */ - unsigned attrib; - - enum draw_vf_attr_format format; - - unsigned offset; - - /** - * Constant data for DRAW_EMIT_*_CONST - */ - union { - uint8_t ub[4]; - float f[4]; - } data; -}; - -struct draw_vertex_fetch; - - - -#if 0 -unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ); -#endif - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ); - -#if 0 -void -draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const attrib[], - unsigned start ); -#endif - -void -draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ); - -struct draw_vertex_fetch * -draw_vf_create( void ); - -void -draw_vf_destroy( struct draw_vertex_fetch *vf ); - - - -/*********************************************************************** - * Internal functions and structs: - */ - -struct draw_vf_attr; - -typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a, - float *out, - const uint8_t *v ); - -typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, - uint8_t *v, - const float *in ); - -typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest ); - - - -/** - * Describes how to convert/move a vertex attribute from a vertex - * array to a vertex structure. - */ -struct draw_vf_attr -{ - struct draw_vertex_fetch *vf; - - unsigned format; - unsigned inputsize; - unsigned inputstride; - unsigned vertoffset; /**< position of the attrib in the vertex struct */ - - boolean isconst; /**< read from const data below */ - uint8_t data[16]; - - unsigned attrib; /**< which vertex attrib (0=position, etc) */ - unsigned vertattrsize; /**< size of the attribute in bytes */ - - uint8_t *inputptr; - const draw_vf_insert_func *insert; - draw_vf_insert_func do_insert; - draw_vf_extract_func extract; -}; - -struct draw_vertex_fetch -{ - struct draw_vf_attr attr[PIPE_MAX_ATTRIBS]; - unsigned attr_count; - unsigned vertex_stride; - - draw_vf_emit_func emit; - - /* Parameters and constants for codegen: - */ - float identity[4]; - - struct draw_vf_fastpath *fastpath; - - void (*codegen_emit)( struct draw_vertex_fetch *vf ); -}; - - -struct draw_vf_attr_type { - unsigned format; - unsigned size; - unsigned stride; - unsigned offset; -}; - -/** XXX this could be moved into draw_vf.c */ -struct draw_vf_fastpath { - unsigned vertex_stride; - unsigned attr_count; - boolean match_strides; - - struct draw_vf_attr_type *attr; - - draw_vf_emit_func func; - struct draw_vf_fastpath *next; -}; - - -void -draw_vf_register_fastpath( struct draw_vertex_fetch *vtx, - boolean match_strides ); - -void -draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ); - -void -draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ); - -void -draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ); - - -/** XXX this type and function could probably be moved into draw_vf.c */ -struct draw_vf_format_info { - const char *name; - draw_vf_insert_func insert[4]; - const unsigned attrsize; - const boolean isconst; -}; - -extern const struct draw_vf_format_info -draw_vf_format_info[DRAW_EMIT_MAX]; - - -#endif diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c deleted file mode 100644 index 7a60a9db9c..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_generic.c +++ /dev/null @@ -1,585 +0,0 @@ - -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_vf.h" - - - -static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; -} - -static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = 1; -} - -static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[3]; -} - -static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; - assert(0); -} - -static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; -} - -static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; -} - -static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; -} - - -static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; -} - -static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; -} - -static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; -} - -static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; -} - -static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - v[2] = 0x00; - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - v[2] = 0x00; - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); -} - -static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; -} - -static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; -} - -static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); -} - -static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; -} - -static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; -} - - -static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); -} - - -const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] = -{ - { "1f", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), FALSE }, - - { "2f", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), FALSE }, - - { "3f", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), FALSE }, - - { "4f", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), FALSE }, - - { "3f_xyw", - { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, - insert_3f_xyw_4 }, - 3 * sizeof(float), FALSE }, - - { "1ub_1f", - { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, - sizeof(uint8_t), FALSE }, - - { "3ub_3f_rgb", - { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, - insert_3ub_3f_rgb_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "3ub_3f_bgr", - { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, - insert_3ub_3f_bgr_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_rgba", - { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, - insert_4ub_4f_rgba_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_bgra", - { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, - insert_4ub_4f_bgra_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_argb", - { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, - insert_4ub_4f_argb_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_abgr", - { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, - insert_4ub_4f_abgr_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "1f_const", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), TRUE }, - - { "2f_const", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), TRUE }, - - { "3f_const", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), TRUE }, - - { "4f_const", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), TRUE }, - - { "pad", - { NULL, NULL, NULL, NULL }, - 0, FALSE }, - -}; - - - - -/*********************************************************************** - * Hardwired fastpaths for emitting whole vertices or groups of - * vertices - */ -#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ -static void NAME( struct draw_vertex_fetch *vf, \ - unsigned count, \ - uint8_t *v ) \ -{ \ - struct draw_vf_attr *a = vf->attr; \ - unsigned i; \ - \ - for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ - if (NR > 0) { \ - F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \ - a[0].inputptr += a[0].inputstride; \ - } \ - \ - if (NR > 1) { \ - F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \ - a[1].inputptr += a[1].inputstride; \ - } \ - \ - if (NR > 2) { \ - F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \ - a[2].inputptr += a[2].inputstride; \ - } \ - \ - if (NR > 3) { \ - F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \ - a[3].inputptr += a[3].inputstride; \ - } \ - \ - if (NR > 4) { \ - F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \ - a[4].inputptr += a[4].inputstride; \ - } \ - } \ -} - - -#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ - insert_null, insert_null, NAME) - -#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ - insert_null, NAME) - -#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ - insert_null, NAME) - - -EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) - -EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) - -EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) - - -/* Use the codegen paths to select one of a number of hardwired - * fastpaths. - */ -void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ) -{ - draw_vf_emit_func func = NULL; - - /* Does it fit a hardwired fastpath? Help! this is growing out of - * control! - */ - switch (vf->attr_count) { - case 2: - if (vf->attr[0].do_insert == insert_3f_3 && - vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - func = emit_xyz3_rgba4; - } - break; - case 3: - if (vf->attr[2].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2; - } - } - break; - case 4: - if (vf->attr[2].do_insert == insert_2f_2 && - vf->attr[3].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2_st2; - } - } - break; - } - - vf->emit = func; -} - -/*********************************************************************** - * Generic (non-codegen) functions for whole vertices or groups of - * vertices - */ - -void draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ) -{ - struct draw_vf_attr *a = vf->attr; - const unsigned attr_count = vf->attr_count; - const unsigned stride = vf->vertex_stride; - unsigned i, j; - - for (i = 0 ; i < count ; i++, v += stride) { - for (j = 0; j < attr_count; j++) { - float *in = (float *)a[j].inputptr; - a[j].inputptr += a[j].inputstride; - a[j].do_insert( &a[j], v + a[j].vertoffset, in ); - } - } -} - - diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c deleted file mode 100644 index aff4ffd985..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include "pipe/p_compiler.h" -#include "util/u_simple_list.h" - -#include "draw_vf.h" - - -#if defined(USE_SSE_ASM) - -#include "rtasm/rtasm_cpu.h" -#include "rtasm/rtasm_x86sse.h" - - -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 - - -struct x86_program { - struct x86_function func; - - struct draw_vertex_fetch *vf; - boolean inputs_safe; - boolean outputs_safe; - boolean have_sse2; - - struct x86_reg identity; - struct x86_reg chan0; -}; - - -static struct x86_reg get_identity( struct x86_program *p ) -{ - return p->identity; -} - -static void emit_load4f_4( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_load4f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Have to jump through some hoops: - * - * c 0 0 0 - * c 0 0 1 - * 0 0 c 1 - * a b c 1 - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); - sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Initialize from identity, then pull in low two words: - */ - sse_movups(&p->func, dest, get_identity(p)); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Pull in low word, then swizzle in identity */ - sse_movss(&p->func, dest, arg0); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); -} - - - -static void emit_load3f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Over-reads by 1 dword - potential SEGV if input is a vertex - * array. - */ - if (p->inputs_safe) { - sse_movups(&p->func, dest, arg0); - } - else { - /* c 0 0 0 - * c c c c - * a b c c - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); - sse_movlps(&p->func, dest, arg0); - } -} - -static void emit_load3f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_2(p, dest, arg0); -} - -static void emit_load3f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load2f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load2f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load1f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - -static void (*load[4][4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = { - { emit_load1f_1, - emit_load1f_1, - emit_load1f_1, - emit_load1f_1 }, - - { emit_load2f_1, - emit_load2f_2, - emit_load2f_2, - emit_load2f_2 }, - - { emit_load3f_1, - emit_load3f_2, - emit_load3f_3, - emit_load3f_3 }, - - { emit_load4f_1, - emit_load4f_2, - emit_load4f_3, - emit_load4f_4 } -}; - -static void emit_load( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg src, - unsigned src_sz) -{ - load[sz-1][src_sz-1](p, dest, src); -} - -static void emit_store4f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_store3f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - if (p->outputs_safe) { - /* Emit the extra dword anyway. This may hurt writecombining, - * may cause other problems. - */ - sse_movups(&p->func, dest, arg0); - } - else { - /* Alternate strategy - emit two, shuffle, emit one. - */ - sse_movlps(&p->func, dest, arg0); - sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ - sse_movss(&p->func, x86_make_disp(dest,8), arg0); - } -} - -static void emit_store2f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_store1f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - - -static void (*store[4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = -{ - emit_store1f, - emit_store2f, - emit_store3f, - emit_store4f -}; - -static void emit_store( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg temp ) - -{ - store[sz-1](p, dest, temp); -} - -static void emit_pack_store_4ub( struct x86_program *p, - struct x86_reg dest, - struct x86_reg temp ) -{ - /* Scale by 255.0 - */ - sse_mulps(&p->func, temp, p->chan0); - - if (p->have_sse2) { - sse2_cvtps2dq(&p->func, temp, temp); - sse2_packssdw(&p->func, temp, temp); - sse2_packuswb(&p->func, temp, temp); - sse_movss(&p->func, dest, temp); - } - else { - struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); - struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); - sse_cvtps2pi(&p->func, mmx0, temp); - sse_movhlps(&p->func, temp, temp); - sse_cvtps2pi(&p->func, mmx1, temp); - mmx_packssdw(&p->func, mmx0, mmx1); - mmx_packuswb(&p->func, mmx0, mmx0); - mmx_movd(&p->func, dest, mmx0); - } -} - -static int get_offset( const void *a, const void *b ) -{ - return (const char *)b - (const char *)a; -} - -/* Not much happens here. Eventually use this function to try and - * avoid saving/reloading the source pointers each vertex (if some of - * them can fit in registers). - */ -static void get_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* Load current a[j].inputptr - */ - x86_mov(&p->func, srcREG, ptr_to_src); -} - -static void update_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - if (a->inputstride) { - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* add a[j].inputstride (hardcoded value - could just as easily - * pull the stride value from memory each time). - */ - x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); - - /* save new value of a[j].inputptr - */ - x86_mov(&p->func, ptr_to_src, srcREG); - } -} - - -/* Lots of hardcoding - * - * EAX -- pointer to current output vertex - * ECX -- pointer to current attribute - * - */ -static boolean build_vertex_emit( struct x86_program *p ) -{ - struct draw_vertex_fetch *vf = p->vf; - unsigned j = 0; - - struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); - struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); - struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); - struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); - struct x86_reg temp = x86_make_reg(file_XMM, 0); - uint8_t *fixup, *label; - - /* Push a few regs? - */ - x86_push(&p->func, countEBP); - x86_push(&p->func, vfESI); - - - /* Get vertex count, compare to zero - */ - x86_xor(&p->func, srcECX, srcECX); - x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); - x86_cmp(&p->func, countEBP, srcECX); - fixup = x86_jcc_forward(&p->func, cc_E); - - /* Initialize destination register. - */ - x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); - - /* Move argument 1 (vf) into a reg: - */ - x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); - - - /* always load, needed or not: - */ - sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); - - /* Note address for loop jump */ - label = x86_get_label(&p->func); - - /* Emit code for each of the attributes. Currently routes - * everything through SSE registers, even when it might be more - * efficient to stick with regular old x86. No optimization or - * other tricks - enough new ground to cover here just getting - * things working. - */ - while (j < vf->attr_count) { - struct draw_vf_attr *a = &vf->attr[j]; - struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); - - /* Now, load an XMM reg from src, perhaps transform, then save. - * Could be shortcircuited in specific cases: - */ - switch (a->format) { - case DRAW_EMIT_1F: - case DRAW_EMIT_1F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 1, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_2F: - case DRAW_EMIT_2F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F: - case DRAW_EMIT_3F_CONST: - /* Potentially the worst case - hardcode 2+1 copying: - */ - if (0) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - else { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - if (a->inputsize > 2) { - emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); - emit_store(p, x86_make_disp(dest,8), 1, temp); - } - else { - sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); - } - update_src_ptr(p, srcECX, vfESI, a); - } - break; - case DRAW_EMIT_4F: - case DRAW_EMIT_4F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 4, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F_XYW: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - - case DRAW_EMIT_1UB_1F: - /* Test for PAD3 + 1UB: - */ - if (j > 0 && - a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) - { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); - emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ - update_src_ptr(p, srcECX, vfESI, a); - } - else { - debug_printf("Can't emit 1ub %x %x %d\n", - a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); - return FALSE; - } - break; - case DRAW_EMIT_3UB_3F_RGB: - case DRAW_EMIT_3UB_3F_BGR: - /* Test for 3UB + PAD1: - */ - if (j == vf->attr_count - 1 || - a[1].vertoffset >= a->vertoffset + 4) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - /* Test for 3UB + 1UB: - */ - else if (j < vf->attr_count - 1 && - a[1].format == DRAW_EMIT_1UB_1F && - a[1].vertoffset == a->vertoffset + 3) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - update_src_ptr(p, srcECX, vfESI, a); - - /* Make room for incoming value: - */ - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - - get_src_ptr(p, srcECX, vfESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); - update_src_ptr(p, srcECX, vfESI, &a[1]); - - /* Rearrange and possibly do BGR conversion: - */ - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - else - sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); - - emit_pack_store_4ub(p, dest, temp); - j++; /* NOTE: two attrs consumed */ - } - else { - debug_printf("Can't emit 3ub\n"); - } - return FALSE; /* add this later */ - break; - - case DRAW_EMIT_4UB_4F_RGBA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_BGRA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ARGB: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ABGR: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - default: - debug_printf("unknown a[%d].format %d\n", j, a->format); - return FALSE; /* catch any new opcodes */ - } - - /* Increment j by at least 1 - may have been incremented above also: - */ - j++; - } - - /* Next vertex: - */ - x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); - - /* decr count, loop if not zero - */ - x86_dec(&p->func, countEBP); - x86_test(&p->func, countEBP, countEBP); - x86_jcc(&p->func, cc_NZ, label); - - /* Exit mmx state? - */ - if (p->func.need_emms) - mmx_emms(&p->func); - - /* Land forward jump here: - */ - x86_fixup_fwd_jump(&p->func, fixup); - - /* Pop regs and return - */ - x86_pop(&p->func, x86_get_base_reg(vfESI)); - x86_pop(&p->func, countEBP); - x86_ret(&p->func); - - vf->emit = (draw_vf_emit_func)x86_get_func(&p->func); - return TRUE; -} - - - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - struct x86_program p; - - if (!rtasm_cpu_has_sse()) { - vf->codegen_emit = NULL; - return; - } - - memset(&p, 0, sizeof(p)); - - p.vf = vf; - p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ - p.have_sse2 = rtasm_cpu_has_sse2(); - p.identity = x86_make_reg(file_XMM, 6); - p.chan0 = x86_make_reg(file_XMM, 7); - - x86_init_func(&p.func); - - if (build_vertex_emit(&p)) { - draw_vf_register_fastpath( vf, TRUE ); - } - else { - /* Note the failure so that we don't keep trying to codegen an - * impossible state: - */ - draw_vf_register_fastpath( vf, FALSE ); - x86_release_func(&p.func); - } -} - -#else - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - /* Dummy version for when USE_SSE_ASM not defined */ -} - -#endif diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vs.c index 133418baca..03fe00a951 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -39,51 +39,6 @@ -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -draw_vertex_shader_queue_flush(struct draw_context *draw) -{ - struct draw_vertex_shader *shader = draw->vertex_shader; - unsigned i; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - shader->prepare( shader, draw ); - -// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); - - /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += 4) { - struct vertex_header *dests[4]; - unsigned elts[4]; - int j, n = MIN2(4, draw->vs.queue_nr - i); - - for (j = 0; j < n; j++) { - elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].vertex; - } - - for ( ; j < 4; j++) { - elts[j] = elts[0]; - dests[j] = draw->vs.queue[i + j].vertex; - } - - assert(n > 0); - assert(n <= 4); - - shader->run(shader, draw, elts, n, dests); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -} - - struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader) @@ -97,10 +52,8 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_exec( draw, shader ); } } - assert(vs); - - tgsi_scan_shader(shader->tokens, &vs->info); + assert(vs); return vs; } @@ -115,9 +68,6 @@ draw_bind_vertex_shader(struct draw_context *draw, { draw->vertex_shader = dvs; draw->num_vs_outputs = dvs->info.num_outputs; - - tgsi_exec_machine_init(&draw->machine); - dvs->prepare( dvs, draw ); } else { diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 4ee7e705e9..f9772b83b8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -31,10 +31,43 @@ #ifndef DRAW_VS_H #define DRAW_VS_H -struct draw_vertex_shader; +#include "draw_context.h" +#include "draw_private.h" + + struct draw_context; struct pipe_shader_state; +/** + * Private version of the compiled vertex_shader + */ +struct draw_vertex_shader { + + /* This member will disappear shortly: + */ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + void (*prepare)( struct draw_vertex_shader *shader, + struct draw_context *draw ); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + void (*run_linear)( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ); + + + void (*delete)( struct draw_vertex_shader * ); +}; + + struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *templ); @@ -47,4 +80,8 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); + +#define MAX_TGSI_VERTICES 4 + + #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index c6e503686a..7a02f6334b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -39,163 +39,120 @@ #include "draw_vs.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_scan.h" -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } +struct exec_vertex_shader { + struct draw_vertex_shader base; + struct tgsi_exec_machine *machine; +}; - return mask; +static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) +{ + return (struct exec_vertex_shader *)vs; } +/* Not required for run_linear. + */ static void vs_exec_prepare( struct draw_vertex_shader *shader, struct draw_context *draw ) { + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(&draw->machine, + tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. */ static void -vs_exec_run( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ) +vs_exec_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { - struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; + unsigned int i, j; + unsigned slot; + + machine->Consts = constants; + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { +#if 0 + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + input[slot][0], + input[slot][1], + input[slot][2], + input[slot][3]); + } +#endif + + for (slot = 0; slot < shader->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } - if (!draw->rasterizer->bypass_vs) { /* run interpreter */ tgsi_exec_machine_run( machine ); - } - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. + /* Unswizzle all output results. */ - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - -#if 0 /*DEBUG*/ - printf("Post xform vert:\n"); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("%d: %f %f %f %f\n", slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3]); - } -#endif - - - } /* loop over vertices */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + + } + +#if 0 + debug_printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < shader->info.num_outputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); + } +#endif + + output = (float (*)[4])((char *)output + output_stride); + } + + } } + static void vs_exec_delete( struct draw_vertex_shader *dvs ) { @@ -208,17 +165,24 @@ struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { - struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); - uint nt = tgsi_num_tokens(state->tokens); + struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); - vs->prepare = vs_exec_prepare; - vs->run = vs_exec_run; - vs->delete = vs_exec_delete; + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &vs->base.info); + + vs->base.prepare = vs_exec_prepare; + vs->base.run_linear = vs_exec_run_linear; + vs->base.delete = vs_exec_delete; + vs->machine = &draw->machine; - return vs; + return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index c8268317ef..171da51dd5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* @@ -47,154 +47,42 @@ struct draw_llvm_vertex_shader { struct draw_vertex_shader base; struct gallivm_prog *llvm_prog; + struct tgsi_exec_machine *machine; }; -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} - - - static void vs_llvm_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ + static void -vs_llvm_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ) +vs_llvm_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { - struct draw_llvm_vertex_shader *shader = + struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - - assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } - - - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, + input, base->info.num_inputs, output, base->info.num_outputs, + constants, count, input_stride, output_stride); +} - if (!draw->rasterizer->bypass_vs) { - /* run shader */ - gallivm_cpu_vs_exec(shader->llvm_prog, - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps); - } - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - } /* loop over vertices */ -} static void vs_llvm_delete( struct draw_vertex_shader *base ) { - struct draw_llvm_vertex_shader *shader = + struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; /* Do something to free compiled shader: @@ -212,23 +100,30 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_llvm_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); - if (vs == NULL) + if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); + vs->base.prepare = vs_llvm_prepare; - vs->base.run = vs_llvm_run; + vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; + vs->machine = &draw->machine; { struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); gallivm_ir_set_layout(ir, GALLIVM_SOA); gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens); + gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); vs->llvm_prog = gallivm_ir_compile(ir); gallivm_ir_delete(ir); } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index f40d65df08..e3f4e67472 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,173 +41,136 @@ #include "draw_private.h" #include "draw_context.h" +#include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" #include "tgsi/util/tgsi_parse.h" +#define SSE_MAX_VERTICES 4 +#define SSE_SWIZZLES 1 +#if SSE_SWIZZLES +typedef void (XSTDCALL *codegen_function) ( + const struct tgsi_exec_vector *input, /* 1 */ + struct tgsi_exec_vector *output, /* 2 */ + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ +#else typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], - struct tgsi_exec_vector *temporary ); - + struct tgsi_exec_vector *temporary, + float (*immediates)[4] ); +#endif struct draw_sse_vertex_shader { struct draw_vertex_shader base; struct x86_function sse2_program; - codegen_function func; -}; + codegen_function func; + + struct tgsi_exec_machine *machine; -/* Should be part of the generated shader: - */ -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; +}; static void vs_sse_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. */ static void -vs_sse_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ) +vs_sse_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } - - - /* Fetch vertices. This may at some point be integrated into the - * compiled shader -- that would require a reorganization where - * multiple versions of the compiled shader might exist, - * specialized for each fetch state. - */ - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - if (!draw->rasterizer->bypass_vs) { +#if SSE_SWIZZLES /* run compiled shader - */ + */ shader->func(machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps ); - } - - - /* XXX: Computing the clipmask and emitting results should be done - * in the vertex program as a set of instructions appended to - * the user-provided code. - */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); + + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); +#else + unsigned int j, slot; - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. + /* run compiled shader */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates); + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + output = (float (*)[4])((char *)output + output_stride); } - } +#endif + } } + static void vs_sse_delete( struct draw_vertex_shader *base ) { @@ -225,9 +188,8 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_sse_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); - if (!draw->use_sse) + if (!rtasm_cpu_has_sse2()) return NULL; vs = CALLOC_STRUCT( draw_sse_vertex_shader ); @@ -235,18 +197,27 @@ draw_create_vs_sse(struct draw_context *draw, return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + vs->base.prepare = vs_sse_prepare; - vs->base.run = vs_sse_run; + vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; + vs->machine = &draw->machine; x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program )) + &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); + if (!vs->func) { + goto fail; + } return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c deleted file mode 100644 index d6bff110b4..0000000000 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ /dev/null @@ -1,366 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -struct wide_stage { - struct draw_stage stage; - - float half_line_width; - float half_point_size; - - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - - int psize_slot; -}; - - - -static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) -{ - return (struct wide_stage *)stage; -} - - -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void passthrough_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void passthrough_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. - */ -static void wide_line( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - - struct prim_header tri; - - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); - - /* - * Draw wide line as a quad (two tris) by "stretching" the line along - * X or Y. - * We need to tweak coords in several ways to be conformant here. - */ - - if (dx > dy) { - /* x-major line */ - pos0[1] = pos0[1] - half_width - 0.25f; - pos1[1] = pos1[1] + half_width - 0.25f; - pos2[1] = pos2[1] - half_width - 0.25f; - pos3[1] = pos3[1] + half_width - 0.25f; - if (pos0[0] < pos2[0]) { - /* left to right line */ - pos0[0] -= 0.5f; - pos1[0] -= 0.5f; - pos2[0] -= 0.5f; - pos3[0] -= 0.5f; - } - else { - /* right to left line */ - pos0[0] += 0.5f; - pos1[0] += 0.5f; - pos2[0] += 0.5f; - pos3[0] += 0.5f; - } - } - else { - /* y-major line */ - pos0[0] = pos0[0] - half_width + 0.25f; - pos1[0] = pos1[0] + half_width + 0.25f; - pos2[0] = pos2[0] - half_width + 0.25f; - pos3[0] = pos3[0] + half_width + 0.25f; - if (pos0[1] < pos2[1]) { - /* top to bottom line */ - pos0[1] -= 0.5f; - pos1[1] -= 0.5f; - pos2[1] -= 0.5f; - pos3[1] -= 0.5f; - } - else { - /* bottom to top line */ - pos0[1] += 0.5f; - pos1[1] += 0.5f; - pos2[1] += 0.5f; - pos3[1] += 0.5f; - } - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -/** - * Set the vertex texcoords for sprite mode. - * Coords may be left untouched or set to a right-side-up or upside-down - * orientation. - */ -static void set_texcoords(const struct wide_stage *wide, - struct vertex_header *v, const float tc[4]) -{ - uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } -} - - -/* If there are lots of sprite points (and why wouldn't there be?) it - * would probably be more sensible to change hardware setup to - * optimize this rather than doing the whole thing in software like - * this. - */ -static void wide_point( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; - float half_size; - float left_adj, right_adj; - - struct prim_header tri; - - /* four dups of original vertex */ - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - /* point size is either per-vertex or fixed size */ - if (wide->psize_slot >= 0) { - half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; - } - else { - half_size = wide->half_point_size; - } - - left_adj = -half_size; /* + 0.25f;*/ - right_adj = half_size; /* + 0.25f;*/ - - pos0[0] += left_adj; - pos0[1] -= half_size; - - pos1[0] += left_adj; - pos1[1] += half_size; - - pos2[0] += right_adj; - pos2[1] -= half_size; - - pos3[0] += right_adj; - pos3[1] += half_size; - - if (sprite) { - static const float tex00[4] = { 0, 0, 0, 1 }; - static const float tex01[4] = { 0, 1, 0, 1 }; - static const float tex11[4] = { 1, 1, 0, 1 }; - static const float tex10[4] = { 1, 0, 0, 1 }; - set_texcoords( wide, v0, tex00 ); - set_texcoords( wide, v1, tex01 ); - set_texcoords( wide, v2, tex10 ); - set_texcoords( wide, v3, tex11 ); - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void wide_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_point_size = 0.5f * draw->rasterizer->point_size; - - /* XXX we won't know the real size if it's computed by the vertex shader! */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) { - stage->point = wide_point; - } - else { - stage->point = passthrough_point; - } - - if (draw->rasterizer->point_sprite) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i, j = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; - j++; - } - } - wide->num_texcoords = j; - } - - wide->psize_slot = -1; - - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - wide->psize_slot = i; - break; - } - } - } - - stage->point( stage, header ); -} - - - -static void wide_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_line_width = 0.5f * draw->rasterizer->line_width; - - if (draw->rasterizer->line_width != 1.0) { - wide->stage.line = wide_line; - } - else { - wide->stage.line = passthrough_line; - } - - stage->line( stage, header ); -} - - -static void wide_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->line = wide_first_line; - stage->point = wide_first_point; - stage->next->flush( stage->next, flags ); -} - - -static void wide_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void wide_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_stage( struct draw_context *draw ) -{ - struct wide_stage *wide = CALLOC_STRUCT(wide_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = wide_first_point; - wide->stage.line = wide_first_line; - wide->stage.tri = passthrough_tri; - wide->stage.flush = wide_flush; - wide->stage.reset_stipple_counter = wide_reset_stipple_counter; - wide->stage.destroy = wide_destroy; - - return &wide->stage; -} diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index c24e19e062..c3f7bfba93 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -65,10 +65,14 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp1.bin gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp2.bin # Emacs tags tags: diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index b6f641a3f8..77900e342b 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -288,10 +288,7 @@ void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, std::cout << "Creating llvm from: " <<std::endl; tgsi_dump(tokens, 0); - llvm::Module *mod = tgsi_to_llvmir(ir, tokens); - - //llvm::Module *mod = tgsi_to_llvm(ir, tokens); ir->module = mod; gallivm_ir_dump(ir, 0); } @@ -306,11 +303,11 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) { struct gallivm_prog *prog = (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - + std::cout << "Before optimizations:"<<std::endl; ir->module->dump(); std::cout<<"-------------------------------"<<std::endl; - + PassManager veri; veri.add(createVerifierPass()); veri.run(*ir->module); @@ -318,7 +315,7 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) prog->num_consts = ir->num_consts; memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); prog->num_interp = ir->num_interp; - + /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index b4d6555d2f..36a64a7747 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -90,10 +90,15 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); struct gallivm_cpu_engine *gallivm_global_cpu_engine(); int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps); + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); int gallivm_cpu_fs_exec(struct gallivm_prog *prog, float x, float y, float (*dests)[PIPE_MAX_SHADER_INPUTS][4], diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 1796f0a177..0fc5c4ec5c 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -1,567 +1,140 @@ -// Generated by llvm2cpp - DO NOT MODIFY! - - -Module* createGallivmBuiltins(Module *mod) { - -mod->setModuleIdentifier("shader"); - -// Type Definitions -ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25); - -PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0); - -std::vector<const Type*>FuncTy_2_args; -FuncTy_2_args.push_back(Type::FloatTy); -FuncTy_2_args.push_back(Type::FloatTy); -FunctionType* FuncTy_2 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_2_args, - /*isVarArg=*/false); - -PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0); - -VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4); - -std::vector<const Type*>FuncTy_5_args; -FuncTy_5_args.push_back(VectorTy_4); -FunctionType* FuncTy_5 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_5_args, - /*isVarArg=*/false); - -std::vector<const Type*>FuncTy_6_args; -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FunctionType* FuncTy_6 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_6_args, - /*isVarArg=*/false); - -VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4); - -std::vector<const Type*>FuncTy_9_args; -FunctionType* FuncTy_9 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_9_args, - /*isVarArg=*/true); - -PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0); - -PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0); - -std::vector<const Type*>FuncTy_12_args; -FuncTy_12_args.push_back(Type::FloatTy); -FunctionType* FuncTy_12 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_12_args, - /*isVarArg=*/false); - -PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0); - -std::vector<const Type*>FuncTy_13_args; -FuncTy_13_args.push_back(VectorTy_4); -FunctionType* FuncTy_13 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_13_args, - /*isVarArg=*/false); - - -// Function Declarations - -Function* func_approx = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"approx", mod); -func_approx->setCallingConv(CallingConv::C); -const ParamAttrsList *func_approx_PAL = 0; -func_approx->setParamAttrs(func_approx_PAL); - -Function* func_powf = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"powf", mod); // (external, no body) -func_powf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_powf_PAL = 0; -func_powf->setParamAttrs(func_powf_PAL); - -Function* func_lit = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"lit", mod); -func_lit->setCallingConv(CallingConv::C); -const ParamAttrsList *func_lit_PAL = 0; -func_lit->setParamAttrs(func_lit_PAL); - -Function* func_cmp = new Function( - /*Type=*/FuncTy_6, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"cmp", mod); -func_cmp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cmp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_cmp_PAL = ParamAttrsList::get(Attrs); - -} -func_cmp->setParamAttrs(func_cmp_PAL); - -Function* func_vcos = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vcos", mod); -func_vcos->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vcos_PAL = 0; -func_vcos->setParamAttrs(func_vcos_PAL); - -Function* func_printf = new Function( - /*Type=*/FuncTy_9, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", mod); // (external, no body) -func_printf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_printf_PAL = 0; -func_printf->setParamAttrs(func_printf_PAL); - -Function* func_cosf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"cosf", mod); // (external, no body) -func_cosf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cosf_PAL = 0; -func_cosf->setParamAttrs(func_cosf_PAL); - -Function* func_scs = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"scs", mod); -func_scs->setCallingConv(CallingConv::C); -const ParamAttrsList *func_scs_PAL = 0; -func_scs->setParamAttrs(func_scs_PAL); - -Function* func_sinf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"sinf", mod); // (external, no body) -func_sinf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_sinf_PAL = 0; -func_sinf->setParamAttrs(func_sinf_PAL); - -Function* func_vsin = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vsin", mod); -func_vsin->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vsin_PAL = 0; -func_vsin->setParamAttrs(func_vsin_PAL); - -Function* func_kilp = new Function( - /*Type=*/FuncTy_13, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"kilp", mod); -func_kilp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_kilp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_kilp_PAL = ParamAttrsList::get(Attrs); - -} -func_kilp->setParamAttrs(func_kilp_PAL); - -// Global Variable Declarations - - -GlobalVariable* gvar_array__str = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str", -mod); - -GlobalVariable* gvar_array__str1 = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str1", -mod); - -// Constant Definitions -Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true); -Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true); -ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f)); -ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f)); -Constant* const_float_18 = Constant::getNullValue(Type::FloatTy); -Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32)); -std::vector<Constant*> const_packed_20_elems; -ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); -const_packed_20_elems.push_back(const_float_21); -UndefValue* const_float_22 = UndefValue::get(Type::FloatTy); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_21); -Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems); -ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10)); -ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10)); -ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10)); -std::vector<Constant*> const_packed_26_elems; -const_packed_26_elems.push_back(const_float_21); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_21); -Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems); -Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy); -std::vector<Constant*> const_packed_28_elems; -const_packed_28_elems.push_back(const_int32_19); -ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10)); -const_packed_28_elems.push_back(const_int32_29); -const_packed_28_elems.push_back(const_int32_25); -const_packed_28_elems.push_back(const_int32_24); -Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems); -std::vector<Constant*> const_packed_30_elems; -const_packed_30_elems.push_back(const_int32_19); -const_packed_30_elems.push_back(const_int32_23); -ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10)); -const_packed_30_elems.push_back(const_int32_31); -const_packed_30_elems.push_back(const_int32_24); -Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems); -std::vector<Constant*> const_packed_32_elems; -const_packed_32_elems.push_back(const_int32_19); -const_packed_32_elems.push_back(const_int32_23); -const_packed_32_elems.push_back(const_int32_25); -ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10)); -const_packed_32_elems.push_back(const_int32_33); -Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems); -std::vector<Constant*> const_ptr_34_indices; -const_ptr_34_indices.push_back(const_int32_19); -const_ptr_34_indices.push_back(const_int32_19); -Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() ); -UndefValue* const_packed_35 = UndefValue::get(VectorTy_4); -std::vector<Constant*> const_ptr_36_indices; -const_ptr_36_indices.push_back(const_int32_19); -const_ptr_36_indices.push_back(const_int32_19); -Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() ); - -// Global Variable Definitions -gvar_array__str->setInitializer(const_array_14); -gvar_array__str1->setInitializer(const_array_15); - -// Function Definitions - -// Function: approx (func_approx) -{ - Function::arg_iterator args = func_approx->arg_begin(); - Value* float_a = args++; - float_a->setName("a"); - Value* float_b = args++; - float_b->setName("b"); - - BasicBlock* label_entry = new BasicBlock("entry",func_approx,0); - - // Block entry (label_entry) - FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry); - SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry); - FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry); - SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry); - FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry); - SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry); - std::vector<Value*> float_call_params; - float_call_params.push_back(float_a_addr_0); - float_call_params.push_back(float_b_addr_1); - CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry); - float_call->setCallingConv(CallingConv::C); - float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0; - float_call->setParamAttrs(float_call_PAL); - - new ReturnInst(float_call, label_entry); - -} - -// Function: lit (func_lit) -{ - Function::arg_iterator args = func_lit->arg_begin(); - Value* packed_tmp = args++; - packed_tmp->setName("tmp"); - - BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0); - BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0); - BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0); - - // Block entry (label_entry_38) - ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38); - FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38); - new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38); - - // Block ifthen (label_ifthen) - InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen); - ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen); - ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen); - std::vector<Value*> float_call_41_params; - float_call_41_params.push_back(float_tmp12); - float_call_41_params.push_back(float_tmp14); - CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen); - float_call_41->setCallingConv(CallingConv::C); - float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0; - float_call_41->setParamAttrs(float_call_41_PAL); - - InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen); - new ReturnInst(packed_tmp16, label_ifthen); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock) - new ReturnInst(const_packed_26, label_UnifiedReturnBlock); - -} - -// Function: cmp (func_cmp) -{ - Function::arg_iterator args = func_cmp->arg_begin(); - Value* packed_tmp0 = args++; - packed_tmp0->setName("tmp0"); - Value* packed_tmp1 = args++; - packed_tmp1->setName("tmp1"); - Value* packed_tmp2 = args++; - packed_tmp2->setName("tmp2"); - - BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0); - BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0); - BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0); - BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0); - BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0); - BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0); - BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0); - - // Block entry (label_entry_44) - ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44); - CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44); - FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44); - ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44); - CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44); - FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44); - SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44); - new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44); - - // Block cond.?14 (label_cond__14) - ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14); - ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14); - CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14); - FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14); - - // Block cond.cont20 (label_cond_cont20) - ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20); - ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20); - CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20); - FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20); - - // Block cond.?28 (label_cond__28) - PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28); - packed_tmp23_reg2mem_0->reserveOperandSpace(2); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28); - ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28); - CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28); - FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28); - - // Block cond.cont34 (label_cond_cont34) - PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34); - packed_tmp23_reg2mem_1->reserveOperandSpace(2); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34); - ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34); - CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34); - FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34); - - // Block cond.?42 (label_cond__42) - PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42); - packed_tmp37_reg2mem_0->reserveOperandSpace(2); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42); - new ReturnInst(packed_tmp5113, label_cond__42); - - // Block cond.cont48 (label_cond_cont48) - PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48); - packed_tmp37_reg2mem_1->reserveOperandSpace(2); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48); - new ReturnInst(packed_tmp51, label_cond_cont48); - -} - -// Function: vcos (func_vcos) -{ - Function::arg_iterator args = func_vcos->arg_begin(); - Value* packed_val = args++; - packed_val->setName("val"); - - BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0); - - // Block entry (label_entry_53) - ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53); - CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53); - ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53); - CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53); - ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53); - CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53); - ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53); - CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53); - std::vector<Value*> int32_call_params; - int32_call_params.push_back(const_ptr_34); - int32_call_params.push_back(double_conv_54); - int32_call_params.push_back(double_conv4); - int32_call_params.push_back(double_conv7); - int32_call_params.push_back(double_conv10); - CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53); - int32_call->setCallingConv(CallingConv::C); - int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0; - int32_call->setParamAttrs(int32_call_PAL); - - CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53); - float_call13->setCallingConv(CallingConv::C); - float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0; - float_call13->setParamAttrs(float_call13_PAL); - - InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53); - CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53); - float_call18->setCallingConv(CallingConv::C); - float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0; - float_call18->setParamAttrs(float_call18_PAL); - - InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53); - CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53); - float_call23->setCallingConv(CallingConv::C); - float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0; - float_call23->setParamAttrs(float_call23_PAL); - - InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53); - CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53); - float_call28->setCallingConv(CallingConv::C); - float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0; - float_call28->setParamAttrs(float_call28_PAL); - - InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53); - CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53); - CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53); - CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53); - CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53); - std::vector<Value*> int32_call43_params; - int32_call43_params.push_back(const_ptr_36); - int32_call43_params.push_back(double_conv33); - int32_call43_params.push_back(double_conv36); - int32_call43_params.push_back(double_conv39); - int32_call43_params.push_back(double_conv42); - CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53); - int32_call43->setCallingConv(CallingConv::C); - int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0; - int32_call43->setParamAttrs(int32_call43_PAL); - - new ReturnInst(packed_tmp30, label_entry_53); - -} - -// Function: scs (func_scs) -{ - Function::arg_iterator args = func_scs->arg_begin(); - Value* packed_val_58 = args++; - packed_val_58->setName("val"); - - BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0); - - // Block entry (label_entry_59) - ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59); - CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59); - float_call_60->setCallingConv(CallingConv::C); - float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0; - float_call_60->setParamAttrs(float_call_60_PAL); - - InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59); - CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59); - float_call7->setCallingConv(CallingConv::C); - float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0; - float_call7->setParamAttrs(float_call7_PAL); - - InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59); - new ReturnInst(packed_tmp9, label_entry_59); - -} - -// Function: vsin (func_vsin) -{ - Function::arg_iterator args = func_vsin->arg_begin(); - Value* packed_val_62 = args++; - packed_val_62->setName("val"); - - BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0); - - // Block entry (label_entry_63) - ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63); - CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63); - float_call_65->setCallingConv(CallingConv::C); - float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0; - float_call_65->setParamAttrs(float_call_65_PAL); - - InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63); - InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63); - InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63); - InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63); - new ReturnInst(packed_tmp15_67, label_entry_63); - -} - -// Function: kilp (func_kilp) -{ - Function::arg_iterator args = func_kilp->arg_begin(); - Value* packed_val_69 = args++; - packed_val_69->setName("val"); - - BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0); - BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0); - BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0); - BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0); - BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0); - - // Block entry (label_entry_70) - ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70); - FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70); - - // Block lor_rhs (label_lor_rhs) - ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs); - FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs); - - // Block lor_rhs5 (label_lor_rhs5) - ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5); - FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5); - - // Block lor_rhs11 (label_lor_rhs11) - ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11); - FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11); - CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11); - new ReturnInst(int32_retval, label_lor_rhs11); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71) - new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71); - -} - -return mod; - -} +static const unsigned char llvm_builtins_data[] = { +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x29,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, +0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, +0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, +0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c, +0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x32,0x22,0x48,0x09, +0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52, +0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x29,0x80,0x21,0x00, +0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,0x80,0x50,0x2b,0x03, +0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,0x14,0x01,0x80,0x11, +0x80,0x22,0x88,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,0x36,0x80,0x87,0x71, +0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,0x70,0x87,0x7a,0xd8, +0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,0xa0, +0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06, +0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,0x71,0x60,0x07,0x7a, +0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,0x20,0x07,0x7a,0x30, +0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07, +0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,0x7a,0x30,0x07,0x72, +0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,0x72,0xa0, +0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,0x07,0x74,0xa0,0x07, +0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78, +0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,0x00,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0x07, +0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,0x04,0x20,0x76,0x46, +0xfc,0x6c,0x48,0x92,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,0x12,0x20,0x00,0x00, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x01,0x00,0x00,0x00,0x30,0x24,0x59,0x00,0x20, +0x08,0x00,0x00,0x00,0x86,0x24,0x0a,0x00,0x04,0x00,0x00,0x00,0xc0,0x90,0x84,0x01, +0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,0x00,0x00,0x00,0x0c, +0x49,0x14,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x49,0x01,0x00,0x41,0x00,0x00,0x00, +0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, +0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, +0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, +0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, +0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, +0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,0x34,0xc9,0x30,0x41, +0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0x46,0x41,0x08,0xcc, +0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,0x35,0x04,0x80,0x39, +0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,0x04,0x3e,0x30,0x0c, +0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,0x05,0xd1,0x4c,0x11, +0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, +0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, +0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,0x17,0x60,0x20,0xc5, +0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,0xf3,0xd4,0xb8,0x69, +0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,0x0c,0x13,0xf3,0x9c, +0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,0x8b,0x23,0x28,0x76, +0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,0xd1,0x4c,0x11,0x66, +0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,0x11,0x00,0x00,0x00, +0x63,0x08,0x4d,0x64,0x16,0xc1,0x49,0x86,0xab,0x22,0x66,0x19,0x02,0x01,0x1b,0x43, +0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,0x0a,0x20,0x0b,0x34, +0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x24,0x83,0x57,0x11,0xb3, +0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,0x48,0xb3,0x04,0xc6, +0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,0x63,0x08,0xcd,0x64, +0x64,0x40,0x70,0x92,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,0x60,0x0c,0xc1,0x99, +0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,0x33,0x38,0xd0,0x00, +0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,0xe0,0x24,0x03,0x1b, +0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,0x30,0x63,0x08,0x0f, +0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,0xa0,0x06,0x70,0x00, +0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x76,0x52,0x4c,0xcc, +0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,0xc6,0x50,0x8a,0x89, +0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,0x79,0x68,0x73,0x20, +0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,0x9e,0xdb,0x32,0x88, +0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,0xa7,0xb7,0x95,0x62, +0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,0x34,0x35,0x56,0x62, +0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,0x2c,0x82,0xd3,0x0c, +0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,0x24,0x01,0x63,0xec, +0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,0xfc,0xc4,0xd0,0x90, +0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,0xc1,0x71,0x7b,0x29, +0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,0x32,0xf6,0xe6,0x46, +0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,0x4e,0x33,0x58,0x47, +0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,0x33,0xe1,0xbc,0xa5, +0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,0x3a,0x40,0xc6,0xde, +0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,0xed,0x82,0x10,0x9c, +0xa6,0xba,0x81,0x44,0x70,0x9a,0xc1,0x17,0x9c,0x66,0x32,0x93,0x42,0x60,0x1e,0x7b, +0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,0xa7,0x6d,0xa4,0x98, +0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,0x9c,0x66,0xc0,0x7b, +0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,0x8b,0xe0,0x34,0x83, +0x2f,0x38,0xcd,0x64,0xd3,0xe6,0x61,0x08,0x4e,0x53,0xd5,0xf6,0x01,0x14,0x44,0x33, +0x45,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,0x1e,0xe1,0x19,0xc6, +0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,0x15,0xc1,0x31,0x84, +0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,0x71,0x6c,0x23,0x38, +0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79, +0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5, +0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0, +0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,0x63,0x21,0x40,0x70, +0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00, +0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf, +0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2, +0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00, +0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7, +0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21, +0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54, +0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13, +0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd, +0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04, +0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48, +0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85, +0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31, +0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c, +0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00, +0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c, +0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80, +0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51, +0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38, +0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51, +0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d, +0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05, +0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00, +0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04, +0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4, +0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58, +0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33, +0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00, +0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f, +0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52, +0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec, +0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87, +0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d, +0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61, +0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00, +0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18, +0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5, +0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73, +0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c, +0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 8f9830d0b1..857c190f7b 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -41,6 +41,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_util.h" #include "tgsi/exec/tgsi_exec.h" #include "tgsi/util/tgsi_dump.h" @@ -179,22 +180,63 @@ typedef void (*vertex_shader_runner)(void *ainputs, float (*aconsts)[4], void *temps); - +#define MAX_TGSI_VERTICES 4 /*! This function is used to execute the gallivm_prog in software. Before calling this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile function. */ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps) + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { + unsigned int i, j; + unsigned slot; vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); + assert(runner); - /*FIXME*/ - runner(inputs, dests, consts, temps); + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run shader */ + runner(machine->Inputs, + machine->Outputs, + (float (*)[4]) constants, + machine->Temps); + + /* Unswizzle all output results + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); + } + } return 0; } diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 8919491792..1a98491b82 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -35,6 +35,8 @@ #include "storage.h" +#include "pipe/p_util.h" + #include <llvm/CallingConv.h> #include <llvm/Constants.h> #include <llvm/DerivedTypes.h> @@ -42,7 +44,8 @@ #include <llvm/InstrTypes.h> #include <llvm/Instructions.h> #include <llvm/ParameterAttributes.h> -#include <llvm/ParamAttrsList.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> #include <sstream> #include <fstream> @@ -53,7 +56,6 @@ using namespace llvm; #include "gallivm_builtins.cpp" #if 0 - llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -84,7 +86,10 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_llvmLit = 0; m_fmtPtr = 0; - createGallivmBuiltins(m_mod); + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&llvm_builtins_data[0], + (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); + m_mod = ParseBitcodeFile(buffer); } llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) @@ -134,12 +139,12 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) // predeclare the intrinsic std::vector<const Type*> fsqrtArgs; fsqrtArgs.push_back(Type::FloatTy); - ParamAttrsList *fsqrtPal = 0; + PAListPtr fsqrtPal; FunctionType* fsqrtType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fsqrtArgs, /*isVarArg=*/false); - m_llvmFSqrt = new Function( + m_llvmFSqrt = Function::Create( /*Type=*/fsqrtType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.sqrt.f32", m_mod); @@ -161,10 +166,9 @@ llvm::Value * Instructions::rsq(llvm::Value *in1) Value *abs = callFAbs(x); Value *sqrt = callFSqrt(abs); - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - sqrt, - name("rsqrt")); + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); } @@ -191,12 +195,12 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) // predeclare the intrinsic std::vector<const Type*> fabsArgs; fabsArgs.push_back(Type::FloatTy); - ParamAttrsList *fabsPal = 0; + PAListPtr fabsPal; FunctionType* fabsType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fabsArgs, /*isVarArg=*/false); - m_llvmFAbs = new Function( + m_llvmFAbs = Function::Create( /*Type=*/fabsType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"fabs", m_mod); @@ -234,12 +238,12 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) std::vector<const Type*> powArgs; powArgs.push_back(Type::FloatTy); powArgs.push_back(Type::FloatTy); - ParamAttrsList *powPal = 0; + PAListPtr powPal; FunctionType* powType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/powArgs, /*isVarArg=*/false); - m_llvmPow = new Function( + m_llvmPow = Function::Create( /*Type=*/powType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.pow.f32", m_mod); @@ -273,9 +277,8 @@ llvm::Value * Instructions::rcp(llvm::Value *in1) Value *x1 = m_builder.CreateExtractElement(in1, m_storage->constantInt(0), name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - x1, name("rcp")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); return vectorFromVals(res, res, res, res); } @@ -314,13 +317,13 @@ llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) m_storage->constantInt(3), name("w")); Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)), + return vectorFromVals(ConstantFP::get(APFloat(1.f)), ry, z, w); } llvm::Value * Instructions::ex2(llvm::Value *in) { - llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)), + llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), m_builder.CreateExtractElement( in, m_storage->constantInt(0), name("x1"))); @@ -333,12 +336,12 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) // predeclare the intrinsic std::vector<const Type*> floorArgs; floorArgs.push_back(Type::FloatTy); - ParamAttrsList *floorPal = 0; + PAListPtr floorPal; FunctionType* floorType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/floorArgs, /*isVarArg=*/false); - m_llvmFloor = new Function( + m_llvmFloor = Function::Create( /*Type=*/floorType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"floorf", m_mod); @@ -376,12 +379,12 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) // predeclare the intrinsic std::vector<const Type*> flogArgs; flogArgs.push_back(Type::FloatTy); - ParamAttrsList *flogPal = 0; + PAListPtr flogPal; FunctionType* flogType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/flogArgs, /*isVarArg=*/false); - m_llvmFlog = new Function( + m_llvmFlog = Function::Create( /*Type=*/flogType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"logf", m_mod); @@ -504,12 +507,12 @@ void Instructions::printVector(llvm::Value *val) llvm::Function * Instructions::declarePrintf() { std::vector<const Type*> args; - ParamAttrsList *params = 0; + PAListPtr params; FunctionType* funcTy = FunctionType::get( /*Result=*/IntegerType::get(32), /*Params=*/args, /*isVarArg=*/true); - Function* func_printf = new Function( + Function* func_printf = Function::Create( /*Type=*/funcTy, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"printf", m_mod); @@ -521,7 +524,7 @@ llvm::Function * Instructions::declarePrintf() llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector<llvm::Value*> vec1 = extractVector(in1); @@ -542,7 +545,7 @@ llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) } llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector<llvm::Value*> vec1 = extractVector(in1); @@ -566,7 +569,7 @@ llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector<llvm::Value*> vec1 = extractVector(in1); @@ -633,8 +636,8 @@ llvm::Value * Instructions::abs(llvm::Value *in) void Instructions::ifop(llvm::Value *in) { - BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0); - BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0); + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); @@ -660,7 +663,7 @@ llvm::BasicBlock * Instructions::currentBlock() const void Instructions::elseop() { assert(!m_ifStack.empty()); - BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); m_builder.CreateBr(ifend); m_builder.SetInsertPoint(m_ifStack.top()); currentBlock()->setName(name("ifelse")); @@ -687,8 +690,8 @@ llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, void Instructions::beginLoop() { - BasicBlock *begin = new BasicBlock(name("loop"), m_func,0); - BasicBlock *end = new BasicBlock(name("endloop"), m_func,0); + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); m_builder.CreateBr(begin); Loop loop; @@ -711,7 +714,7 @@ void Instructions::endLoop() void Instructions::brk() { assert(!m_loopStack.empty()); - BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); m_builder.CreateBr(m_loopStack.top().end); m_builder.SetInsertPoint(unr); } @@ -760,13 +763,13 @@ llvm::Function * Instructions::declareFunc(int label) args.push_back(vecPtr); args.push_back(vecPtr); args.push_back(vecPtr); - ParamAttrsList *params = 0; + PAListPtr params; FunctionType *funcType = FunctionType::get( /*Result=*/Type::VoidTy, /*Params=*/args, /*isVarArg=*/false); std::string name = createFuncName(label); - Function *func = new Function( + Function *func = Function::Create( /*Type=*/funcType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/name.c_str(), m_mod); @@ -784,7 +787,7 @@ void Instructions::bgnSub(unsigned label) ptr_INPUT->setName("INPUT"); m_storage->pushArguments(ptr_INPUT); - llvm::BasicBlock *entry = new BasicBlock("entry", func, 0); + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); m_func = func; m_builder.SetInsertPoint(entry); @@ -809,10 +812,10 @@ llvm::Function * Instructions::findFunction(int label) llvm::Value * Instructions::constVector(float x, float y, float z, float w) { std::vector<Constant*> vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x)); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y)); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z)); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w)); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); return ConstantVector::get(m_floatVecType, vec); } diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 9ebc17dd8e..19ca84ddc6 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -36,7 +36,7 @@ #include <llvm/BasicBlock.h> #include <llvm/Module.h> #include <llvm/Value.h> -#include <llvm/Support/LLVMBuilder.h> +#include <llvm/Support/IRBuilder.h> #include <map> #include <stack> @@ -125,7 +125,7 @@ private: llvm::Module *m_mod; llvm::Function *m_func; char m_name[32]; - llvm::LLVMFoldingBuilder m_builder; + llvm::IRBuilder m_builder; int m_idx; llvm::VectorType *m_floatVecType; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 6f83b56a72..76049ade7c 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -1,8 +1,35 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #include "instructionssoa.h" #include "storagesoa.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_util.h" #include <llvm/CallingConv.h> #include <llvm/Constants.h> @@ -10,7 +37,10 @@ #include <llvm/Function.h> #include <llvm/Instructions.h> #include <llvm/Transforms/Utils/Cloning.h> -#include <llvm/ParamAttrsList.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> + #include <iostream> @@ -143,16 +173,56 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) void InstructionsSoa::createFunctionMap() { + m_functionsMap[TGSI_OPCODE_ABS] = "abs"; m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_MIN] = "min"; + m_functionsMap[TGSI_OPCODE_MAX] = "max"; m_functionsMap[TGSI_OPCODE_POWER] = "pow"; + m_functionsMap[TGSI_OPCODE_LIT] = "lit"; + m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; } void InstructionsSoa::createDependencies() { - std::vector<std::string> powDeps(1); - powDeps[0] = "powf"; - m_builtinDependencies["pow"] = powDeps; + { + std::vector<std::string> powDeps(2); + powDeps[0] = "powf"; + powDeps[1] = "powvec"; + m_builtinDependencies["pow"] = powDeps; + } + { + std::vector<std::string> absDeps(2); + absDeps[0] = "fabsf"; + absDeps[1] = "absvec"; + m_builtinDependencies["abs"] = absDeps; + } + { + std::vector<std::string> maxDeps(1); + maxDeps[0] = "maxvec"; + m_builtinDependencies["max"] = maxDeps; + } + { + std::vector<std::string> minDeps(1); + minDeps[0] = "minvec"; + m_builtinDependencies["min"] = minDeps; + } + { + std::vector<std::string> litDeps(4); + litDeps[0] = "minvec"; + litDeps[1] = "maxvec"; + litDeps[2] = "powf"; + litDeps[3] = "powvec"; + m_builtinDependencies["lit"] = litDeps; + } + { + std::vector<std::string> rsqDeps(4); + rsqDeps[0] = "sqrtf"; + rsqDeps[1] = "sqrtvec"; + rsqDeps[2] = "fabsf"; + rsqDeps[3] = "absvec"; + m_builtinDependencies["rsq"] = rsqDeps; + } } llvm::Function * InstructionsSoa::function(int op) @@ -162,9 +232,13 @@ llvm::Function * InstructionsSoa::function(int op) std::string name = m_functionsMap[op]; + std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl; + std::vector<std::string> deps = m_builtinDependencies[name]; for (unsigned int i = 0; i < deps.size(); ++i) { - injectFunction(m_builtins->getFunction(deps[i])); + llvm::Function *func = m_builtins->getFunction(deps[i]); + std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl; + injectFunction(func); } llvm::Function *originalFunc = m_builtins->getFunction(name); @@ -183,10 +257,22 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { - m_builtins = createSoaBuiltins(); + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&soabuiltins_data[0], + (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); + m_builtins = ParseBitcodeFile(buffer); + std::cout<<"Builtins created at "<<m_builtins<<std::endl; + assert(m_builtins); createDependencies(); } + +std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1) +{ + llvm::Function *func = function(TGSI_OPCODE_ABS); + return callBuiltin(func, in1); +} + std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2) { @@ -204,32 +290,32 @@ llvm::Value * InstructionsSoa::allocaTemp() std::vector<Value*> indices; indices.push_back(m_storage->constantInt(0)); indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *getElem = new GetElementPtrInst(alloca, - indices.begin(), - indices.end(), - name("allocaPtr"), - m_builder.GetInsertBlock()); + GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); return getElem; } std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) { - GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(0), - name("xPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(1), - name("yPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(2), - name("zPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(3), - name("wPtr"), - m_builder.GetInsertBlock()); + GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); std::vector<llvm::Value*> res(4); res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); @@ -320,6 +406,21 @@ std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> i return callBuiltin(func, in1, in2); } +std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + + +std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + void checkFunction(Function *func) { for (Function::const_iterator BI = func->begin(), BE = func->end(); @@ -354,29 +455,56 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) } llvm::Function *func = 0; if (originalFunc->isDeclaration()) { - std::cout << "function decleration" <<std::endl; - func = new Function(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, - originalFunc->getName(), currentModule()); + func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, + originalFunc->getName(), currentModule()); func->setCallingConv(CallingConv::C); - const ParamAttrsList *pal = 0; + const PAListPtr pal; func->setParamAttrs(pal); currentModule()->dump(); } else { DenseMap<const Value*, Value *> val; + val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); + func = CloneFunction(originalFunc, val); +#if 0 std::cout <<" replacing "<<m_builtins->getFunction("powf") <<", with " <<currentModule()->getFunction("powf")<<std::endl; - func = CloneFunction(originalFunc, val); std::cout<<"1111-------------------------------"<<std::endl; checkFunction(originalFunc); std::cout<<"2222-------------------------------"<<std::endl; checkFunction(func); std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl; +#endif currentModule()->getFunctionList().push_back(func); - std::cout << "Func parent is "<<func->getParent() - <<", cur is "<<currentModule() <<std::endl; } if (op != TGSI_OPCODE_LAST) { m_functions[op] = func; } } + +std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index b9104ea286..3e20b652dd 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -29,7 +29,7 @@ #define INSTRUCTIONSSOA_H #include <pipe/p_shader_tokens.h> -#include <llvm/Support/LLVMBuilder.h> +#include <llvm/Support/IRBuilder.h> #include <map> #include <vector> @@ -48,6 +48,7 @@ public: InstructionsSoa(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, StorageSoa *storage); + std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1); std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); @@ -55,13 +56,21 @@ public: const std::vector<llvm::Value*> in2); std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in); std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2, const std::vector<llvm::Value*> in3); + std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); void end(); std::vector<llvm::Value*> extractVector(llvm::Value *vector); @@ -87,7 +96,7 @@ private: const std::vector<llvm::Value*> in3); void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); private: - llvm::LLVMFoldingBuilder m_builder; + llvm::IRBuilder m_builder; StorageSoa *m_storage; std::map<int, std::string> m_functionsMap; diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 4f98d754ba..6b9d626ed4 100644 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -1,4 +1,3 @@ -/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. @@ -30,7 +29,7 @@ * Authors: * Zack Rusin zack@tungstengraphics.com */ -typedef __attribute__(( ocu_vector_type(4) )) float float4; +typedef __attribute__(( ext_vector_type(4) )) float float4; extern float powf(float a, float b); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 4d658be520..78f84510e2 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -31,7 +31,30 @@ * Authors: * Zack Rusin zack@tungstengraphics.com */ -typedef __attribute__(( ocu_vector_type(4) )) float float4; +typedef __attribute__(( ext_vector_type(4) )) float float4; + + +extern float fabsf(float val); + +float4 absvec(float4 vec) +{ + float4 res; + res.x = fabsf(vec.x); + res.y = fabsf(vec.y); + res.z = fabsf(vec.z); + res.w = fabsf(vec.w); + + return res; +} + +void abs(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + res[0] = absvec(tmp0x); + res[1] = absvec(tmp0y); + res[2] = absvec(tmp0z); + res[3] = absvec(tmp0w); +} void dp3(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, @@ -61,30 +84,104 @@ void dp4(float4 *res, } extern float powf(float num, float p); +extern float sqrtf(float x); + +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} void pow(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) { - float4 p; - p.x = powf(tmp0x.x, tmp1x.x); - p.y = powf(tmp0x.y, tmp1x.y); - p.z = powf(tmp0x.z, tmp1x.z); - p.w = powf(tmp0x.w, tmp1x.w); - - res[0] = p; - res[1] = p; - res[2] = p; - res[3] = p; + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +void min(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = minvec(tmp0x, tmp1x); + res[1] = minvec(tmp0y, tmp1y); + res[2] = minvec(tmp0z, tmp1z); + res[3] = minvec(tmp0w, tmp1w); +} + + +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +void max(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = maxvec(tmp0x, tmp1x); + res[1] = maxvec(tmp0y, tmp1y); + res[2] = maxvec(tmp0z, tmp1z); + res[3] = maxvec(tmp0w, tmp1w); +} + + +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; + + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmp0x; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; } -#if 0 -void yo(float4 *out, float4 *in) + +float4 sqrtvec(float4 vec) { - float4 res[4]; + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} - dp3(res, in[0], in[1], in[2], in[3], - in[4], in[5], in[6], in[7]); - out[1] = res[1]; +void rsq(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 onevec = (float4) {1., 1., 1., 1.}; + res[0] = onevec/sqrtvec(absvec(tmp0x)); + res[1] = onevec/sqrtvec(absvec(tmp0y)); + res[2] = onevec/sqrtvec(absvec(tmp0z)); + res[3] = onevec/sqrtvec(absvec(tmp0w)); } -#endif diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index c4326de8c5..6f373f6dd5 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -69,10 +69,10 @@ llvm::Constant *Storage::shuffleMask(int vec) { if (!m_extSwizzleVec) { std::vector<Constant*> elems; - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); } @@ -186,26 +186,26 @@ llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) if ((mask & TGSI_WRITEMASK_X)) { llvm::Value *x = new ExtractElementInst(src, unsigned(0), name("x"), m_block); - dst = new InsertElementInst(dst, x, unsigned(0), - name("dstx"), m_block); + dst = InsertElementInst::Create(dst, x, unsigned(0), + name("dstx"), m_block); } if ((mask & TGSI_WRITEMASK_Y)) { llvm::Value *y = new ExtractElementInst(src, unsigned(1), name("y"), m_block); - dst = new InsertElementInst(dst, y, unsigned(1), - name("dsty"), m_block); + dst = InsertElementInst::Create(dst, y, unsigned(1), + name("dsty"), m_block); } if ((mask & TGSI_WRITEMASK_Z)) { llvm::Value *z = new ExtractElementInst(src, unsigned(2), name("z"), m_block); - dst = new InsertElementInst(dst, z, unsigned(2), - name("dstz"), m_block); + dst = InsertElementInst::Create(dst, z, unsigned(2), + name("dstz"), m_block); } if ((mask & TGSI_WRITEMASK_W)) { llvm::Value *w = new ExtractElementInst(src, unsigned(3), name("w"), m_block); - dst = new InsertElementInst(dst, w, unsigned(3), - name("dstw"), m_block); + dst = InsertElementInst::Create(dst, w, unsigned(3), + name("dstw"), m_block); } return dst; } @@ -295,10 +295,10 @@ llvm::Value * Storage::immediateElement(int idx) void Storage::addImmediate(float *val) { std::vector<Constant*> vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0])); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1])); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2])); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3])); + vec[0] = ConstantFP::get(APFloat(val[0])); + vec[1] = ConstantFP::get(APFloat(val[1])); + vec[2] = ConstantFP::get(APFloat(val[2])); + vec[3] = ConstantFP::get(APFloat(val[3])); m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); } @@ -308,11 +308,11 @@ llvm::Value * Storage::elemPtr(Args arg) std::vector<Value*> indices; indices.push_back(constantInt(0)); indices.push_back(constantInt(static_cast<int>(arg))); - GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT, - indices.begin(), - indices.end(), - name("input_ptr"), - m_block); + GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); return new LoadInst(getElem, name("input_field"), false, m_block); } @@ -322,7 +322,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, GetElementPtrInst *getElem = 0; if (indIdx) { - getElem = new GetElementPtrInst(ptr, + getElem = GetElementPtrInst::Create(ptr, BinaryOperator::create(Instruction::Add, indIdx, constantInt(idx), @@ -331,7 +331,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, name("field"), m_block); } else { - getElem = new GetElementPtrInst(ptr, + getElem = GetElementPtrInst::Create(ptr, constantInt(idx), name("field"), m_block); @@ -350,7 +350,7 @@ void Storage::setKilElement(llvm::Value *val) std::vector<Value*> indices; indices.push_back(constantInt(0)); indices.push_back(constantInt(static_cast<int>(KilArg))); - GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT, + GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, indices.begin(), indices.end(), name("kil_ptr"), diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index bb6fe3d7e1..78d754371f 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -207,11 +207,11 @@ llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, indices.push_back(index); indices.push_back(constantInt(channel)); - GetElementPtrInst *getElem = new GetElementPtrInst(ptr, - indices.begin(), - indices.end(), - name("ptr"), - m_block); + GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); return getElem; } @@ -264,10 +264,10 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); std::vector<Constant*> immValues; - ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0])); - ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1])); - ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2])); - ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3])); + ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); immValues.push_back(constx); immValues.push_back(consty); immValues.push_back(constz); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index ab9e7a06fb..9695358ab8 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -692,12 +692,14 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); } break; case TGSI_OPCODE_RCP: { } break; case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); } break; case TGSI_OPCODE_EXP: @@ -724,9 +726,11 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SLT: { @@ -740,6 +744,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); } break; case TGSI_OPCODE_LERP: { @@ -781,6 +786,7 @@ translate_instructionir(llvm::Module *module, case TGSI_OPCODE_MULTIPLYMATRIX: break; case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); } break; case TGSI_OPCODE_RCC: @@ -1014,7 +1020,7 @@ tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) Value *ptr_INPUT = args++; ptr_INPUT->setName("input"); - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); tgsi_parse_init(&parse, tokens); @@ -1085,7 +1091,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, Value *temps = args++; temps->setName("temps"); - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); tgsi_parse_init(&parse, tokens); diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index a9fa518c67..d654dbcc91 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -6,9 +6,11 @@ LIBNAME = pipebuffer C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ + pb_bufmgr_cache.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ pb_winsys.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 3d41fd84a6..604a217982 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -5,9 +5,11 @@ pipebuffer = env.ConvenienceLibrary( source = [ 'pb_buffer_fenced.c', 'pb_buffer_malloc.c', + 'pb_bufmgr_cache.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', 'pb_winsys.c', ]) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 4b09c80b2a..49705cb862 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -193,6 +193,17 @@ pb_reference(struct pb_buffer **dst, /** + * Utility function to check whether a requested alignment is consistent with + * the provided alignment or not. + */ +static INLINE int +pb_check_alignment(size_t requested, size_t provided) +{ + return requested <= provided && (provided % requested) == 0; +} + + +/** * Malloc-based buffer to store data that can't be used by the graphics * hardware. */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 24ba61a0b7..2fa0842971 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,7 +34,14 @@ */ +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <unistd.h> +#endif + #include "pipe/p_compiler.h" +#include "pipe/p_error.h" #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" @@ -44,9 +51,6 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" -#ifndef WIN32 -#include <unistd.h> -#endif /** @@ -54,6 +58,13 @@ */ #define SUPER(__derived) (&(__derived)->base) +#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) +#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) +#define PIPE_BUFFER_USAGE_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) + struct fenced_buffer_list { @@ -76,6 +87,15 @@ struct fenced_buffer struct pb_buffer *buffer; + /* FIXME: protect access with mutex */ + + /** + * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; struct pipe_fence_handle *fence; struct list_head head; @@ -97,7 +117,10 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) { struct fenced_buffer_list *fenced_list = fenced_buf->list; + assert(fenced_buf->base.base.refcount); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); + assert(!fenced_buf->head.prev); assert(!fenced_buf->head.next); LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); @@ -111,8 +134,6 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) static INLINE void _fenced_buffer_destroy(struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!fenced_buf->base.base.refcount); assert(!fenced_buf->fence); pb_reference(&fenced_buf->buffer, NULL); @@ -128,8 +149,8 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); assert(fenced_buf->head.next); @@ -174,6 +195,9 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, break; prev_fence = fenced_buf->fence; } + else { + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + } _fenced_buffer_remove(fenced_buf); @@ -183,6 +207,40 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, } +/** + * Serialize writes, but allow concurrent reads. + */ +static INLINE enum pipe_error +fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + /* Allow concurrent reads */ + if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0) + return PIPE_OK; + + /* Wait for the CPU to finish */ + if(fenced_buf->mapcount) { + /* FIXME: Use thread conditions variables to signal when mapcount + * reaches zero */ + debug_warning("attemp to write concurrently to buffer"); + /* XXX: we must not fail here in order to support texture mipmap generation + return PIPE_ERROR_RETRY; + */ + } + + /* Wait for the GPU to finish */ + if(fenced_buf->fence) { + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) + return PIPE_ERROR_RETRY; + _fenced_buffer_remove(fenced_buf); + } + + return PIPE_OK; +} + + static void fenced_buffer_destroy(struct pb_buffer *buf) { @@ -199,6 +257,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) prev = curr->prev; do { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); _fenced_buffer_remove(fenced_buf); curr = prev; prev = curr->prev; @@ -219,16 +278,30 @@ static void * fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - return pb_map(fenced_buf->buffer, flags); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + void *map; + assert((flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE) == 0); + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) + return NULL; + + map = pb_map(fenced_buf->buffer, flags); + if(map) + ++fenced_buf->mapcount; + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; + return map; } static void fenced_buffer_unmap(struct pb_buffer *buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + assert(fenced_buf->mapcount); pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } @@ -281,16 +354,49 @@ void buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pipe_winsys *winsys = fenced_list->winsys; + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; + /* FIXME: receive this as a parameter */ + unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + /* This is a public function, so be extra cautious with the buffer passed, + * as happens frequently to receive null buffers, or pointer to buffers + * other than fenced buffers. */ + assert(buf); + if(!buf) + return; + assert(buf->vtbl == &fenced_buffer_vtbl); + if(buf->vtbl != &fenced_buffer_vtbl) + return; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; + + if(fence == fenced_buf->fence) { + /* Handle the same fence case specially, not only because it is a fast + * path, but mostly to avoid serializing two writes with the same fence, + * as that would bring the hardware down to synchronous operation without + * any benefit. + */ + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return; + } + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) { + /* FIXME: propagate error */ + (void)0; + } _glthread_LOCK_MUTEX(fenced_list->mutex); if (fenced_buf->fence) _fenced_buffer_remove(fenced_buf); - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - if (fenced_buf->fence) + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; _fenced_buffer_add(fenced_buf); + } _glthread_UNLOCK_MUTEX(fenced_list->mutex); } @@ -334,7 +440,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) sched_yield(); #endif _fenced_buffer_list_check_free(fenced_list, 1); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 0cf8e92e37..8de286e3f9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -80,7 +80,7 @@ struct pb_manager /** - * Static buffer pool manager. + * Static buffer pool sub-allocator. * * Manages the allocation of equally sized buffers. It does so by allocating * a single big buffer and divide it equally sized buffers. @@ -94,7 +94,7 @@ pool_bufmgr_create(struct pb_manager *provider, /** - * Wraper around the old memory manager. + * Static sub-allocator based the old memory manager. * * It managers buffers of different sizes. It does so by allocating a buffer * with the size of the heap, and then using the old mm memory manager to manage @@ -114,6 +114,40 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, size_t size, size_t align2); +/** + * Slab sub-allocator. + */ +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc); + +/** + * Allow a range of buffer size, by aggregating multiple slabs sub-allocators + * with different bucket sizes. + */ +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc); + + +/** + * Time-based buffer cache. + * + * This manager keeps a cache of destroyed buffers during a time interval. + */ +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs); + +void +pb_cache_flush(struct pb_manager *mgr); + + /** * Fenced buffer manager. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c new file mode 100644 index 0000000000..4bd3f94a6c --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -0,0 +1,344 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer cache. + * + * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_cache_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_cache_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_cache_manager *mgr; + + /** Caching time interval */ + struct util_time start, end; + + struct list_head head; +}; + + +struct pb_cache_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + unsigned usecs; + + _glthread_Mutex mutex; + + struct list_head delayed; + size_t numDelayed; +}; + + +static INLINE struct pb_cache_buffer * +pb_cache_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_cache_buffer *)buf; +} + + +static INLINE struct pb_cache_manager * +pb_cache_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_cache_manager *)mgr; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + assert(!buf->base.base.refcount); + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +/** + * Free as many cache buffers from the list head as possible. + */ +static void +_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + struct util_time now; + + util_time_get(&now); + + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(!util_time_timeout(&buf->start, &buf->end, &now)) + break; + + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } +} + + +static void +pb_cache_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + struct pb_cache_manager *mgr = buf->mgr; + + _glthread_LOCK_MUTEX(mgr->mutex); + assert(buf->base.base.refcount == 0); + + _pb_cache_buffer_list_check_free(mgr); + + util_time_get(&buf->start); + util_time_add(&buf->start, mgr->usecs, &buf->end); + LIST_ADDTAIL(&buf->head, &mgr->delayed); + ++mgr->numDelayed; + _glthread_UNLOCK_MUTEX(mgr->mutex); +} + + +static void * +pb_cache_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_map(buf->buffer, flags); +} + + +static void +pb_cache_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static void +pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_cache_buffer_vtbl = { + pb_cache_buffer_destroy, + pb_cache_buffer_map, + pb_cache_buffer_unmap, + pb_cache_buffer_get_base_buffer +}; + + +static INLINE boolean +pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, + size_t size, + const struct pb_desc *desc) +{ + /* TODO: be more lenient with size */ + if(buf->base.base.size != size) + return FALSE; + + if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) + return FALSE; + + /* XXX: check usage too? */ + + return TRUE; +} + + +static struct pb_buffer * +pb_cache_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct pb_cache_buffer *buf; + struct pb_cache_buffer *curr_buf; + struct list_head *curr, *next; + struct util_time now; + + _glthread_LOCK_MUTEX(mgr->mutex); + + buf = NULL; + curr = mgr->delayed.next; + next = curr->next; + + /* search in the expired buffers, freeing them in the process */ + util_time_get(&now); + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc)) + buf = curr_buf; + else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now)) + _pb_cache_buffer_destroy(curr_buf); + curr = next; + next = curr->next; + } + + /* keep searching in the hot buffers */ + while(!buf && curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(pb_cache_is_buffer_compat(curr_buf, size, desc)) + buf = curr_buf; + curr = next; + next = curr->next; + } + + if(buf) { + LIST_DEL(&buf->head); + _glthread_UNLOCK_MUTEX(mgr->mutex); + ++buf->base.base.refcount; + return &buf->base; + } + + _glthread_UNLOCK_MUTEX(mgr->mutex); + + buf = CALLOC_STRUCT(pb_cache_buffer); + if(!buf) + return NULL; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + assert(buf->buffer->base.refcount >= 1); + assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment)); + assert((buf->buffer->base.usage & desc->usage) == desc->usage); + assert(buf->buffer->base.size >= size); + + buf->base.base.refcount = 1; + buf->base.base.alignment = buf->buffer->base.alignment; + buf->base.base.usage = buf->buffer->base.usage; + buf->base.base.size = buf->buffer->base.size; + + buf->base.vtbl = &pb_cache_buffer_vtbl; + buf->mgr = mgr; + + return &buf->base; +} + + +void +pb_cache_flush(struct pb_manager *_mgr) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + + _glthread_LOCK_MUTEX(mgr->mutex); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + _pb_cache_buffer_destroy(buf); + curr = next; + next = curr->next; + } + _glthread_UNLOCK_MUTEX(mgr->mutex); +} + + +static void +pb_cache_manager_destroy(struct pb_manager *mgr) +{ + pb_cache_flush(mgr); + FREE(mgr); +} + + +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs) +{ + struct pb_cache_manager *mgr; + + mgr = (struct pb_cache_manager *)CALLOC(1, sizeof(*mgr)); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_cache_manager_destroy; + mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->provider = provider; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->numDelayed = 0; + _glthread_INIT_MUTEX(mgr->mutex); + + return &mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index bffca5b244..9d809e2f9b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,7 +30,7 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca <jrfonseca@tungstengraphics.dot.com> + * \author José Fonseca <jrfonseca@tungstengraphics.dot.com> */ @@ -101,7 +101,8 @@ fenced_bufmgr_destroy(struct pb_manager *mgr) fenced_buffer_list_destroy(fenced_mgr->fenced_list); - fenced_mgr->provider->destroy(fenced_mgr->provider); + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); FREE(fenced_mgr); } @@ -113,6 +114,9 @@ fenced_bufmgr_create(struct pb_manager *provider, { struct fenced_pb_manager *fenced_mgr; + if(!provider) + return NULL; + fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr)); if (!fenced_mgr) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c new file mode 100644 index 0000000000..b931455056 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -0,0 +1,495 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, FREE of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * S-lab pool implementation. + * + * @author Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#define DRI_SLABPOOL_ALLOC_RETRIES 100 + + +struct pb_slab; + +struct pb_slab_buffer +{ + struct pb_buffer base; + + struct pb_slab *slab; + struct list_head head; + unsigned mapCount; + size_t start; + _glthread_Cond event; +}; + +struct pb_slab +{ + struct list_head head; + struct list_head freeBuffers; + size_t numBuffers; + size_t numFree; + struct pb_slab_buffer *buffers; + struct pb_slab_manager *mgr; + + struct pb_buffer *bo; + void *virtual; +}; + +struct pb_slab_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + size_t bufSize; + size_t slabSize; + struct pb_desc desc; + + struct list_head slabs; + struct list_head freeSlabs; + + _glthread_Mutex mutex; +}; + +/** + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ +struct pb_slab_range_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + size_t minBufSize; + size_t maxBufSize; + struct pb_desc desc; + + unsigned numBuckets; + size_t *bucketSizes; + struct pb_manager **buckets; +}; + + +static INLINE struct pb_slab_buffer * +pb_slab_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_slab_buffer *)buf; +} + + +static INLINE struct pb_slab_manager * +pb_slab_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_manager *)mgr; +} + + +static INLINE struct pb_slab_range_manager * +pb_slab_range_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_range_manager *)mgr; +} + + +/** + * Delete a buffer from the slab delayed list and put + * it on the slab FREE list. + */ +static void +pb_slab_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + struct pb_slab *slab = buf->slab; + struct pb_slab_manager *mgr = slab->mgr; + struct list_head *list = &buf->head; + + _glthread_LOCK_MUTEX(mgr->mutex); + + assert(buf->base.base.refcount == 0); + + buf->mapCount = 0; + + LIST_DEL(list); + LIST_ADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + LIST_DEL(list); + LIST_ADDTAIL(list, &mgr->freeSlabs); + } + + if (mgr->slabs.next == &mgr->slabs || slab->numFree + != slab->numBuffers) { + + struct list_head *next; + + for (list = mgr->freeSlabs.next, next = list->next; list + != &mgr->freeSlabs; list = next, next = list->next) { + + slab = LIST_ENTRY(struct pb_slab, list, head); + + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); + } + } + + _glthread_UNLOCK_MUTEX(mgr->mutex); +} + + +static void * +pb_slab_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + ++buf->mapCount; + return (void *) ((uint8_t *) buf->slab->virtual + buf->start); +} + + +static void +pb_slab_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + --buf->mapCount; + if (buf->mapCount == 0) + _glthread_COND_BROADCAST(buf->event); +} + + +static void +pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_get_base_buffer(buf->slab->bo, base_buf, offset); + *offset += buf->start; +} + + +static const struct pb_vtbl +pb_slab_buffer_vtbl = { + pb_slab_buffer_destroy, + pb_slab_buffer_map, + pb_slab_buffer_unmap, + pb_slab_buffer_get_base_buffer +}; + + +static enum pipe_error +pb_slab_create(struct pb_slab_manager *mgr) +{ + struct pb_slab *slab; + struct pb_slab_buffer *buf; + unsigned numBuffers; + unsigned i; + enum pipe_error ret; + + slab = CALLOC_STRUCT(pb_slab); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* + * FIXME: We should perhaps allow some variation in slabsize in order + * to efficiently reuse slabs. + */ + + slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); + if(!slab->bo) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err0; + } + + slab->virtual = pb_map(slab->bo, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!slab->virtual) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + pb_unmap(slab->bo); + + numBuffers = slab->bo->base.size / mgr->bufSize; + + slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + LIST_INITHEAD(&slab->head); + LIST_INITHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->mgr = mgr; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + buf->base.base.refcount = 0; + buf->base.base.size = mgr->bufSize; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + buf->base.vtbl = &pb_slab_buffer_vtbl; + buf->slab = slab; + buf->start = i* mgr->bufSize; + buf->mapCount = 0; + _glthread_INIT_COND(buf->event); + LIST_ADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + return PIPE_OK; + +out_err1: + pb_reference(&slab->bo, NULL); +out_err0: + FREE(slab); + return ret; +} + + +static struct pb_buffer * +pb_slab_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + static struct pb_slab_buffer *buf; + struct pb_slab *slab; + struct list_head *list; + int count = DRI_SLABPOOL_ALLOC_RETRIES; + + /* check size */ + assert(size == mgr->bufSize); + if(size != mgr->bufSize) + return NULL; + + /* check if we can provide the requested alignment */ + assert(pb_check_alignment(desc->alignment, mgr->desc.alignment)); + if(!pb_check_alignment(desc->alignment, mgr->desc.alignment)) + return NULL; + assert(pb_check_alignment(desc->alignment, mgr->bufSize)); + if(!pb_check_alignment(desc->alignment, mgr->bufSize)) + return NULL; + + /* XXX: check for compatible buffer usage too? */ + + _glthread_LOCK_MUTEX(mgr->mutex); + while (mgr->slabs.next == &mgr->slabs && count > 0) { + if (mgr->slabs.next != &mgr->slabs) + break; + + _glthread_UNLOCK_MUTEX(mgr->mutex); + if (count != DRI_SLABPOOL_ALLOC_RETRIES) + util_time_sleep(1); + _glthread_LOCK_MUTEX(mgr->mutex); + (void) pb_slab_create(mgr); + count--; + } + + list = mgr->slabs.next; + if (list == &mgr->slabs) { + _glthread_UNLOCK_MUTEX(mgr->mutex); + return NULL; + } + slab = LIST_ENTRY(struct pb_slab, list, head); + if (--slab->numFree == 0) + LIST_DELINIT(list); + + list = slab->freeBuffers.next; + LIST_DELINIT(list); + + _glthread_UNLOCK_MUTEX(mgr->mutex); + buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + + ++buf->base.base.refcount; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + + return &buf->base; +} + + +static void +pb_slab_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + /* TODO: cleanup all allocated buffers */ + FREE(mgr); +} + + +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr; + + mgr = CALLOC_STRUCT(pb_slab_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_slab_manager_destroy; + mgr->base.create_buffer = pb_slab_manager_create_buffer; + + mgr->provider = provider; + mgr->bufSize = bufSize; + mgr->slabSize = slabSize; + mgr->desc = *desc; + + LIST_INITHEAD(&mgr->slabs); + LIST_INITHEAD(&mgr->freeSlabs); + + _glthread_INIT_MUTEX(mgr->mutex); + + return &mgr->base; +} + + +static struct pb_buffer * +pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + size_t bufSize; + unsigned i; + + bufSize = mgr->minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + if(bufSize >= size) + return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); + bufSize *= 2; + } + + /* Fall back to allocate a buffer object directly from the provider. */ + return mgr->provider->create_buffer(mgr->provider, size, desc); +} + + +static void +pb_slab_range_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + unsigned i; + + for (i = 0; i < mgr->numBuckets; ++i) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); + FREE(mgr->bucketSizes); + FREE(mgr); +} + + +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr; + size_t bufSize; + unsigned i; + + mgr = CALLOC_STRUCT(pb_slab_range_manager); + if (!mgr) + goto out_err0; + + mgr->base.destroy = pb_slab_range_manager_destroy; + mgr->base.create_buffer = pb_slab_range_manager_create_buffer; + + mgr->provider = provider; + mgr->minBufSize = minBufSize; + mgr->maxBufSize = maxBufSize; + + mgr->numBuckets = 1; + bufSize = minBufSize; + while(bufSize < maxBufSize) { + bufSize *= 2; + ++mgr->numBuckets; + } + + mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets)); + if (!mgr->buckets) + goto out_err1; + + bufSize = minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc); + if(!mgr->buckets[i]) + goto out_err2; + bufSize *= 2; + } + + return &mgr->base; + +out_err2: + for (i = 0; i < mgr->numBuckets; ++i) + if(mgr->buckets[i]) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); +out_err1: + FREE(mgr); +out_err0: + return NULL; +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index eb3359750b..f01e12faa0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -26,14 +26,29 @@ **************************************************************************/ +#include "pipe/p_debug.h" #include "rtasm_cpu.h" +static boolean rtasm_sse_enabled(void) +{ + static boolean firsttime = 1; + static boolean enabled; + + /* This gets called quite often at the moment: + */ + if (firsttime) { + enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE); + firsttime = FALSE; + } + return enabled; +} + int rtasm_cpu_has_sse(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) - return 1; +#if defined(__i386__) || defined(__386__) || defined(i386) + return rtasm_sse_enabled(); #else return 0; #endif @@ -42,8 +57,8 @@ int rtasm_cpu_has_sse(void) int rtasm_cpu_has_sse2(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) - return 1; +#if defined(__i386__) || defined(__386__) || defined(i386) + return rtasm_sse_enabled(); #else return 0; #endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4d33950e99..4e036d9032 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -33,15 +33,114 @@ #define DISASSEM 0 #define X86_TWOB 0x0f -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *) label; -} + +#define DUMP_SSE 0 + +#if DUMP_SSE + +static void +_print_reg( + struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: debug_printf( "EAX" ); break; + case reg_CX: debug_printf( "ECX" ); break; + case reg_DX: debug_printf( "EDX" ); break; + case reg_BX: debug_printf( "EBX" ); break; + case reg_SP: debug_printf( "ESP" ); break; + case reg_BP: debug_printf( "EBP" ); break; + case reg_SI: debug_printf( "ESI" ); break; + case reg_DI: debug_printf( "EDI" ); break; + } + break; + case file_MMX: + debug_printf( "MMX%u", reg.idx ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + debug_printf( "fp%u", reg.idx ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + + +#define DUMP_START() debug_printf( "\n" ) +#define DUMP_END() debug_printf( "\n" ) + +#define DUMP() do { \ + const char *foo = __FUNCTION__; \ + while (*foo && *foo != '_') \ + foo++; \ + if (*foo) \ + foo++; \ + debug_printf( "\n% 15s ", foo ); \ +} while (0) + +#define DUMP_I( I ) do { \ + DUMP(); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_R( R0 ) do { \ + DUMP(); \ + _print_reg( R0 ); \ +} while( 0 ) + +#define DUMP_RR( R0, R1 ) do { \ + DUMP(); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ +} while( 0 ) + +#define DUMP_RI( R0, I ) do { \ + DUMP(); \ + _print_reg( R0 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#define DUMP_RRI( R0, R1, I ) do { \ + DUMP(); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( ) +#define DUMP_I( I ) +#define DUMP_R( R0 ) +#define DUMP_RR( R0, R1 ) +#define DUMP_RI( R0, I ) +#define DUMP_RRI( R0, R1, I ) + +#endif static void do_realloc( struct x86_function *p ) { - if (p->size == 0) { + if (p->store == p->error_overflow) { + p->csr = p->store; + } + else if (p->size == 0) { p->size = 1024; p->store = rtasm_exec_malloc(p->size); p->csr = p->store; @@ -51,10 +150,22 @@ static void do_realloc( struct x86_function *p ) unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); - memcpy(p->store, tmp, used); - p->csr = p->store + used; + + if (p->store) { + memcpy(p->store, tmp, used); + p->csr = p->store + used; + } + else { + p->csr = p->store; + } + rtasm_exec_free(tmp); } + + if (p->store == NULL) { + p->store = p->csr = p->error_overflow; + p->size = sizeof(p->error_overflow); + } } /* Emit bytes to the instruction stream: @@ -236,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ) return x86_make_reg( reg.file, reg.idx ); } -unsigned char *x86_get_label( struct x86_function *p ) +int x86_get_label( struct x86_function *p ) { - return p->csr; + return p->csr - p->store; } @@ -250,16 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p ) void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ) + int label ) { - intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + int offset = label - (x86_get_label(p) + 2); + DUMP_I(cc); + if (offset < 0) { + int amt = p->csr - p->store; + assert(amt > -offset); + } + if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); + offset = label - (x86_get_label(p) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -267,23 +384,27 @@ void x86_jcc( struct x86_function *p, /* Always use a 32bit offset for forward jumps: */ -unsigned char *x86_jcc_forward( struct x86_function *p, - enum x86_cc cc ) +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) { + DUMP_I(cc); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, 0); return x86_get_label(p); } -unsigned char *x86_jmp_forward( struct x86_function *p) +int x86_jmp_forward( struct x86_function *p) { + DUMP(); emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); } -unsigned char *x86_call_forward( struct x86_function *p) +int x86_call_forward( struct x86_function *p) { + DUMP(); + emit_1ub(p, 0xe8); emit_1i(p, 0); return x86_get_label(p); @@ -292,34 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p) /* Fixup offset from forward jump: */ void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ) + int fixup ) { - *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); + *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; } -void x86_jmp( struct x86_function *p, unsigned char *label) +void x86_jmp( struct x86_function *p, int label) { + DUMP_I( label ); emit_1ub(p, 0xe9); - emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); + emit_1i(p, label - x86_get_label(p) - 4); } -#if 0 -/* This doesn't work once we start reallocating & copying the - * generated code on buffer fills, because the call is relative to the - * current pc. - */ -void x86_call( struct x86_function *p, void (*label)()) -{ - emit_1ub(p, 0xe8); - emit_1i(p, cptr(label) - x86_get_label(p) - 4); -} -#else void x86_call( struct x86_function *p, struct x86_reg reg) { + DUMP_R( reg ); emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } -#endif /* michal: @@ -328,6 +439,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg) */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { + DUMP_RI( dst, imm ); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); @@ -336,14 +448,23 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_push( struct x86_function *p, struct x86_reg reg ) { - assert(reg.mod == mod_REG); - emit_1ub(p, 0x50 + reg.idx); + DUMP_R( reg ); + if (reg.mod == mod_REG) + emit_1ub(p, 0x50 + reg.idx); + else + { + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 6, reg); + } + + p->stack_offset += 4; } void x86_pop( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x58 + reg.idx); p->stack_offset -= 4; @@ -352,6 +473,7 @@ void x86_pop( struct x86_function *p, void x86_inc( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x40 + reg.idx); } @@ -359,17 +481,27 @@ void x86_inc( struct x86_function *p, void x86_dec( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x48 + reg.idx); } void x86_ret( struct x86_function *p ) { + DUMP(); + assert(p->stack_offset == 0); emit_1ub(p, 0xc3); } +void x86_retw( struct x86_function *p, unsigned short imm ) +{ + DUMP(); + emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); +} + void x86_sahf( struct x86_function *p ) { + DUMP(); emit_1ub(p, 0x9e); } @@ -377,6 +509,7 @@ void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm( p, 0x8b, 0x89, dst, src ); } @@ -384,6 +517,7 @@ void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm( p, 0x33, 0x31, dst, src ); } @@ -391,6 +525,7 @@ void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm( p, 0x3b, 0x39, dst, src ); } @@ -398,6 +533,7 @@ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_1ub(p, 0x8d); emit_modrm( p, dst, src ); } @@ -406,6 +542,7 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_1ub(p, 0x85); emit_modrm( p, dst, src ); } @@ -414,20 +551,36 @@ void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm(p, 0x03, 0x01, dst, src ); } +/* Calculate EAX * src, results in EDX:EAX. + */ void x86_mul( struct x86_function *p, struct x86_reg src ) { - assert (src.file == file_REG32 && src.mod == mod_REG); - emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); + DUMP_R( src ); + emit_1ub(p, 0xf7); + emit_modrm_noreg(p, 4, src ); +} + + +void x86_imul( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0xAF); + emit_modrm(p, dst, src); } + void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm(p, 0x2b, 0x29, dst, src ); } @@ -435,6 +588,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm( p, 0x0b, 0x09, dst, src ); } @@ -442,6 +596,7 @@ void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_op_modrm( p, 0x23, 0x21, dst, src ); } @@ -456,6 +611,7 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, 0xF3, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -464,6 +620,7 @@ void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x28, 0x29, dst, src ); } @@ -472,6 +629,7 @@ void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -480,6 +638,7 @@ void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ @@ -489,6 +648,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ @@ -498,6 +658,7 @@ void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -506,6 +667,7 @@ void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -514,6 +676,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5E); emit_modrm( p, dst, src ); } @@ -522,6 +685,7 @@ void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5D); emit_modrm( p, dst, src ); } @@ -530,6 +694,7 @@ void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5C); emit_modrm( p, dst, src ); } @@ -538,6 +703,7 @@ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -546,6 +712,7 @@ void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -554,6 +721,7 @@ void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -562,6 +730,7 @@ void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -570,6 +739,7 @@ void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x55); emit_modrm( p, dst, src ); } @@ -578,6 +748,7 @@ void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x54); emit_modrm( p, dst, src ); } @@ -586,6 +757,7 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x52); emit_modrm( p, dst, src ); } @@ -594,6 +766,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x52); emit_modrm( p, dst, src ); @@ -603,6 +776,7 @@ void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x12); emit_modrm( p, dst, src ); @@ -612,6 +786,7 @@ void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x16); emit_modrm( p, dst, src ); @@ -621,6 +796,7 @@ void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x56); emit_modrm( p, dst, src ); } @@ -629,6 +805,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x57); emit_modrm( p, dst, src ); } @@ -637,6 +814,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_XMM || src.mod != mod_REG)); @@ -646,36 +824,62 @@ void sse_cvtps2pi( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_cvtdq2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5b); + emit_modrm( p, dst, src ); +} + /* Shufps can also be used to implement a reduced swizzle when dest == * arg0. */ void sse_shufps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( dst, src, shuf ); emit_2ub(p, X86_TWOB, 0xC6); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + void sse_cmpps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char cc) { + DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, cc); } void sse_pmovmskb( struct x86_function *p, - struct x86_reg dest, + struct x86_reg dst, struct x86_reg src) { - emit_3ub(p, 0x66, X86_TWOB, 0xD7); - emit_modrm(p, dest, src); + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dst, src); } /*********************************************************************** @@ -686,12 +890,13 @@ void sse_pmovmskb( struct x86_function *p, * Perform a reduced swizzle: */ void sse2_pshufd( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( dst, src, shuf ); emit_3ub(p, 0x66, X86_TWOB, 0x70); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } @@ -699,6 +904,7 @@ void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); emit_modrm( p, dst, src ); } @@ -707,6 +913,7 @@ void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x5B); emit_modrm( p, dst, src ); } @@ -715,6 +922,7 @@ void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x6B); emit_modrm( p, dst, src ); } @@ -723,6 +931,7 @@ void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x63); emit_modrm( p, dst, src ); } @@ -731,14 +940,26 @@ void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x67); emit_modrm( p, dst, src ); } +void sse2_punpcklbw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x60); + emit_modrm( p, dst, src ); +} + + void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -747,6 +968,7 @@ void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -755,6 +977,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); emit_2ub(p, 0x66, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); } @@ -767,30 +990,35 @@ void sse2_movd( struct x86_function *p, */ void x87_fist( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 2, dst); } void x87_fistp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); } void x87_fild( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); } void x87_fldz( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xee); } void x87_fldcw( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); assert(arg.file == file_REG32); assert(arg.mod != mod_REG); emit_1ub(p, 0xd9); @@ -799,26 +1027,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg ) void x87_fld1( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xe8); } void x87_fldl2e( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xea); } void x87_fldln2( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xed); } void x87_fwait( struct x86_function *p ) { + DUMP(); emit_1ub(p, 0x9b); } void x87_fnclex( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xdb, 0xe2); } @@ -855,49 +1088,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 assert(0); } -void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc8, 0xdc, 0xc8, 4); } -void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe0, 0xdc, 0xe8, 4); } -void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe8, 0xdc, 0xe0, 5); } -void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc0, 0xdc, 0xc0, 0); } -void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf0, 0xdc, 0xf8, 6); } -void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf8, 0xdc, 0xf0, 7); @@ -905,6 +1144,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) void x87_fmulp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); @@ -912,6 +1152,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) void x87_fsubp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); @@ -919,6 +1160,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); @@ -926,6 +1168,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) void x87_faddp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); @@ -933,6 +1176,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) void x87_fdivp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); @@ -940,6 +1184,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); @@ -947,70 +1192,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) void x87_fucom( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe0+arg.idx); } void x87_fucomp( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); } void x87_fucompp( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xda, 0xe9); } void x87_fxch( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xd9, 0xc8+arg.idx); } void x87_fabs( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xe1); } void x87_fchs( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xe0); } void x87_fcos( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xff); } void x87_fprndint( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xfc); } void x87_fscale( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xfd); } void x87_fsin( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xfe); } void x87_fsincos( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xfb); } void x87_fsqrt( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xfa); } void x87_fxtract( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xf4); } @@ -1020,6 +1278,7 @@ void x87_fxtract( struct x86_function *p ) */ void x87_f2xm1( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xf0); } @@ -1028,6 +1287,7 @@ void x87_f2xm1( struct x86_function *p ) */ void x87_fyl2x( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xf1); } @@ -1038,12 +1298,14 @@ void x87_fyl2x( struct x86_function *p ) */ void x87_fyl2xp1( struct x86_function *p ) { + DUMP(); emit_2ub(p, 0xd9, 0xf9); } void x87_fld( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( arg ); if (arg.file == file_x87) emit_2ub(p, 0xd9, 0xc0 + arg.idx); else { @@ -1054,6 +1316,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) void x87_fst( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd0 + dst.idx); else { @@ -1064,6 +1327,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst ) void x87_fstp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd8 + dst.idx); else { @@ -1074,6 +1338,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) void x87_fcom( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd0 + dst.idx); else { @@ -1084,6 +1349,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd8 + dst.idx); else { @@ -1095,6 +1361,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( dst ); assert(dst.file == file_REG32); if (dst.idx == reg_AX && @@ -1115,6 +1382,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) void mmx_emms( struct x86_function *p ) { + DUMP(); assert(p->need_emms); emit_2ub(p, 0x0f, 0x77); p->need_emms = 0; @@ -1124,6 +1392,7 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1137,6 +1406,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1150,6 +1420,7 @@ void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); @@ -1159,6 +1430,7 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6f, 0x7f, dst, src ); @@ -1186,18 +1458,25 @@ void x86_init_func( struct x86_function *p ) p->size = 0; p->store = NULL; p->csr = p->store; + DUMP_START(); } void x86_init_func_size( struct x86_function *p, unsigned code_size ) { p->size = code_size; p->store = rtasm_exec_malloc(code_size); + if (p->store == NULL) { + p->store = p->error_overflow; + } p->csr = p->store; + DUMP_START(); } void x86_release_func( struct x86_function *p ) { - rtasm_exec_free(p->store); + if (p->store && p->store != p->error_overflow) + rtasm_exec_free(p->store); + p->store = NULL; p->csr = NULL; p->size = 0; @@ -1206,9 +1485,14 @@ void x86_release_func( struct x86_function *p ) void (*x86_get_func( struct x86_function *p ))(void) { + DUMP_END(); if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) p->store; + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else + return (void (*)(void)) p->store; } #else diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 606b41eb35..eacaeeaf6f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -43,6 +43,7 @@ struct x86_function { unsigned char *csr; unsigned stack_offset; int need_emms; + unsigned char error_overflow[4]; const char *fn; }; @@ -123,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ); /* Labels, jumps and fixup: */ -unsigned char *x86_get_label( struct x86_function *p ); +int x86_get_label( struct x86_function *p ); void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ); + int label ); -unsigned char *x86_jcc_forward( struct x86_function *p, +int x86_jcc_forward( struct x86_function *p, enum x86_cc cc ); -unsigned char *x86_jmp_forward( struct x86_function *p); +int x86_jmp_forward( struct x86_function *p); -unsigned char *x86_call_forward( struct x86_function *p); +int x86_call_forward( struct x86_function *p); void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ); + int fixup ); -void x86_jmp( struct x86_function *p, unsigned char *label ); +void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); @@ -165,6 +166,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -201,7 +203,10 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -211,10 +216,12 @@ void x86_inc( struct x86_function *p, struct x86_reg reg ); void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); void x86_ret( struct x86_function *p ); +void x86_retw( struct x86_function *p, unsigned short imm ); void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c index 97ee5882a1..5e4126e014 100644 --- a/src/gallium/auxiliary/sct/sct.c +++ b/src/gallium/auxiliary/sct/sct.c @@ -209,6 +209,7 @@ remove_context_from_surface(struct sct_surface *si, } else { prev = curr; + next = curr->next; } } } diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile new file mode 100644 index 0000000000..451911a354 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/Makefile @@ -0,0 +1,2 @@ +default: + cd .. ; make diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 78e7dec569..826b432f09 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -88,6 +88,10 @@ #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define FOR_EACH_CHANNEL(CHAN)\ @@ -262,6 +266,8 @@ tgsi_exec_machine_init( mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; } } @@ -287,10 +293,10 @@ micro_abs( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) fabs( (double) src->f[0] ); - dst->f[1] = (float) fabs( (double) src->f[1] ); - dst->f[2] = (float) fabs( (double) src->f[2] ); - dst->f[3] = (float) fabs( (double) src->f[3] ); + dst->f[0] = fabsf( src->f[0] ); + dst->f[1] = fabsf( src->f[1] ); + dst->f[2] = fabsf( src->f[2] ); + dst->f[3] = fabsf( src->f[3] ); } static void @@ -334,10 +340,10 @@ micro_ceil( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) ceil( (double) src->f[0] ); - dst->f[1] = (float) ceil( (double) src->f[1] ); - dst->f[2] = (float) ceil( (double) src->f[2] ); - dst->f[3] = (float) ceil( (double) src->f[3] ); + dst->f[0] = ceilf( src->f[0] ); + dst->f[1] = ceilf( src->f[1] ); + dst->f[2] = ceilf( src->f[2] ); + dst->f[3] = ceilf( src->f[3] ); } static void @@ -345,10 +351,10 @@ micro_cos( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) cos( (double) src->f[0] ); - dst->f[1] = (float) cos( (double) src->f[1] ); - dst->f[2] = (float) cos( (double) src->f[2] ); - dst->f[3] = (float) cos( (double) src->f[3] ); + dst->f[0] = cosf( src->f[0] ); + dst->f[1] = cosf( src->f[1] ); + dst->f[2] = cosf( src->f[2] ); + dst->f[3] = cosf( src->f[3] ); } static void @@ -430,10 +436,10 @@ micro_exp2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = (float) pow( 2.0, (double) src->f[0] ); - dst->f[1] = (float) pow( 2.0, (double) src->f[1] ); - dst->f[2] = (float) pow( 2.0, (double) src->f[2] ); - dst->f[3] = (float) pow( 2.0, (double) src->f[3] ); + dst->f[0] = powf( 2.0f, src->f[0] ); + dst->f[1] = powf( 2.0f, src->f[1] ); + dst->f[2] = powf( 2.0f, src->f[2] ); + dst->f[3] = powf( 2.0f, src->f[3] ); } static void @@ -463,10 +469,10 @@ micro_flr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) floor( (double) src->f[0] ); - dst->f[1] = (float) floor( (double) src->f[1] ); - dst->f[2] = (float) floor( (double) src->f[2] ); - dst->f[3] = (float) floor( (double) src->f[3] ); + dst->f[0] = floorf( src->f[0] ); + dst->f[1] = floorf( src->f[1] ); + dst->f[2] = floorf( src->f[2] ); + dst->f[3] = floorf( src->f[3] ); } static void @@ -474,10 +480,10 @@ micro_frc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] ); - dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] ); - dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] ); - dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] ); + dst->f[0] = src->f[0] - floorf( src->f[0] ); + dst->f[1] = src->f[1] - floorf( src->f[1] ); + dst->f[2] = src->f[2] - floorf( src->f[2] ); + dst->f[3] = src->f[3] - floorf( src->f[3] ); } static void @@ -510,10 +516,24 @@ micro_lg2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f; - dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f; - dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f; - dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f; + dst->f[0] = logf( src->f[0] ) * 1.442695f; + dst->f[1] = logf( src->f[1] ) * 1.442695f; + dst->f[2] = logf( src->f[2] ) * 1.442695f; + dst->f[3] = logf( src->f[3] ) * 1.442695f; +} + +static void +micro_le( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; } static void @@ -764,10 +784,10 @@ micro_pow( const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] ); - dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] ); - dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] ); - dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] ); + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); } static void @@ -775,10 +795,10 @@ micro_rnd( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) ); - dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) ); - dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) ); - dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) ); + dst->f[0] = floorf( src->f[0] + 0.5f ); + dst->f[1] = floorf( src->f[1] + 0.5f ); + dst->f[2] = floorf( src->f[2] + 0.5f ); + dst->f[3] = floorf( src->f[3] + 0.5f ); } static void @@ -833,20 +853,20 @@ micro_sin( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) sin( (double) src->f[0] ); - dst->f[1] = (float) sin( (double) src->f[1] ); - dst->f[2] = (float) sin( (double) src->f[2] ); - dst->f[3] = (float) sin( (double) src->f[3] ); + dst->f[0] = sinf( src->f[0] ); + dst->f[1] = sinf( src->f[1] ); + dst->f[2] = sinf( src->f[2] ); + dst->f[3] = sinf( src->f[3] ); } static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) sqrt( (double) src->f[0] ); - dst->f[1] = (float) sqrt( (double) src->f[1] ); - dst->f[2] = (float) sqrt( (double) src->f[2] ); - dst->f[3] = (float) sqrt( (double) src->f[3] ); + dst->f[0] = sqrtf( src->f[0] ); + dst->f[1] = sqrtf( src->f[1] ); + dst->f[2] = sqrtf( src->f[2] ); + dst->f[3] = sqrtf( src->f[3] ); } static void @@ -1516,41 +1536,44 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - debug_printf("TGSI: EXP opcode not implemented\n"); - /* from ARB_v_p: - tmp = ScalarLoad(op0); - result.x = 2^floor(tmp); - result.y = tmp - floor(tmp); - result.z = RoughApprox2ToX(tmp); - result.w = 1.0; - */ -#if 0 - /* something like this: */ FETCH( &r[0], 0, CHAN_X ); - micro_exp2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } -#endif break; case TGSI_OPCODE_LOG: - debug_printf("TGSI: LOG opcode not implemented\n"); - /* from ARB_v_p: - tmp = fabs(ScalarLoad(op0)); - result.x = floor(log2(tmp)); - result.y = tmp / 2^(floor(log2(tmp))); - result.z = RoughApproxLog2(tmp); - result.w = 1.0; - */ -#if 0 - /* something like this: */ FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ + micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ + micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[0], 0, CHAN_X ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ + micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ + STORE( &r[0], 0, CHAN_Y ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[1], 0, CHAN_Z ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } -#endif break; case TGSI_OPCODE_MUL: @@ -1975,7 +1998,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); STORE( &r[0], 0, chan_index ); } break; @@ -1992,7 +2015,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 45c49dd007..19bd78df3d 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -133,9 +133,15 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_PRIMITIVE_I 34 #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -#define TGSI_EXEC_TEMP_R0 35 +#define TGSI_EXEC_TEMP_THREE_I 34 +#define TGSI_EXEC_TEMP_THREE_C 3 -#define TGSI_EXEC_NUM_TEMPS (32 + 4) +#define TGSI_EXEC_TEMP_HALF_I 35 +#define TGSI_EXEC_TEMP_HALF_C 0 + +#define TGSI_EXEC_TEMP_R0 36 + +#define TGSI_EXEC_NUM_TEMPS (32 + 5) #define TGSI_EXEC_NUM_ADDRS 1 #define TGSI_EXEC_NUM_IMMEDIATES 256 @@ -166,7 +172,7 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; unsigned ImmLimit; - float (*Consts)[4]; + const float (*Consts)[4]; struct tgsi_exec_vector *Inputs; struct tgsi_exec_vector *Outputs; const struct tgsi_token *Tokens; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 4e80597b3f..8018bd7fa4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -34,115 +34,14 @@ #include "rtasm/rtasm_x86sse.h" -#if defined(__i386__) || defined(__386__) +#ifdef PIPE_ARCH_X86 -#define DUMP_SSE 0 - -#if DUMP_SSE - -static void -_print_reg( - struct x86_reg reg ) -{ - if (reg.mod != mod_REG) - debug_printf( "[" ); - - switch( reg.file ) { - case file_REG32: - switch( reg.idx ) { - case reg_AX: - debug_printf( "EAX" ); - break; - case reg_CX: - debug_printf( "ECX" ); - break; - case reg_DX: - debug_printf( "EDX" ); - break; - case reg_BX: - debug_printf( "EBX" ); - break; - case reg_SP: - debug_printf( "ESP" ); - break; - case reg_BP: - debug_printf( "EBP" ); - break; - case reg_SI: - debug_printf( "ESI" ); - break; - case reg_DI: - debug_printf( "EDI" ); - break; - } - break; - case file_MMX: - assert( 0 ); - break; - case file_XMM: - debug_printf( "XMM%u", reg.idx ); - break; - case file_x87: - assert( 0 ); - break; - } - - if (reg.mod == mod_DISP8 || - reg.mod == mod_DISP32) - debug_printf("+%d", reg.disp); - - if (reg.mod != mod_REG) - debug_printf( "]" ); -} - -static void -_fill( - const char *op ) -{ - unsigned count = 10 - strlen( op ); - - while( count-- ) { - debug_printf( " " ); - } -} - -#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) -#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) -#define DUMP( OP ) debug_printf( "\n%s", OP ) -#define DUMP_I( OP, I ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - debug_printf( "%u", I ); } while( 0 ) -#define DUMP_R( OP, R0 ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 ); } while( 0 ) -#define DUMP_RR( OP, R0, R1 ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 );\ - debug_printf( ", " );\ - _print_reg( R1 ); } while( 0 ) -#define DUMP_RRI( OP, R0, R1, I ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 );\ - debug_printf( ", " );\ - _print_reg( R1 );\ - debug_printf( ", " );\ - debug_printf( "%u", I ); } while( 0 ) - -#else - -#define DUMP_START() -#define DUMP_END() -#define DUMP( OP ) -#define DUMP_I( OP, I ) -#define DUMP_R( OP, R0 ) -#define DUMP_RR( OP, R0, R1 ) -#define DUMP_RRI( OP, R0, R1, I ) +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 -#endif #define FOR_EACH_CHANNEL( CHAN )\ for( CHAN = 0; CHAN < 4; CHAN++ ) @@ -208,15 +107,9 @@ get_output_base( void ) static struct x86_reg get_temp_base( void ) { -#ifdef WIN32 return x86_make_reg( file_REG32, reg_BX ); -#else - return x86_make_reg( - file_REG32, - reg_SI ); -#endif } static struct x86_reg @@ -225,17 +118,28 @@ get_coef_base( void ) return get_output_base(); } +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + /** * Data access helpers. */ + static struct x86_reg -get_argument( - unsigned index ) +get_immediate( + unsigned vec, + unsigned chan ) { return x86_make_disp( - x86_make_reg( file_REG32, reg_SP ), - (index + 1) * 4 ); + get_immediate_base(), + (vec * 4 + chan) * 4 ); } static struct x86_reg @@ -289,200 +193,6 @@ get_coef( ((vec * 3 + member) * 4 + chan) * 4 ); } -/** - * X86 rtasm wrappers. - */ - -static void -emit_addps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ADDPS", dst, src ); - sse_addps( func, dst, src ); -} - -static void -emit_andnps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ANDNPS", dst, src ); - sse_andnps( func, dst, src ); -} - -static void -emit_andps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ANDPS", dst, src ); - sse_andps( func, dst, src ); -} - -static void -emit_call( - struct x86_function *func, - void (* addr)() ) -{ - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - DUMP_I( "CALL", addr ); - x86_mov_reg_imm( func, ecx, (unsigned long) addr ); - x86_call( func, ecx ); -} - -static void -emit_cmpps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src, - enum sse_cc cc ) -{ - DUMP_RRI( "CMPPS", dst, src, cc ); - sse_cmpps( func, dst, src, cc ); -} - -static void -emit_cvttps2dq( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "CVTTPS2DQ", dst, src ); - sse2_cvttps2dq( func, dst, src ); -} - -static void -emit_maxps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MAXPS", dst, src ); - sse_maxps( func, dst, src ); -} - -static void -emit_minps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MINPS", dst, src ); - sse_minps( func, dst, src ); -} - -static void -emit_mov( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOV", dst, src ); - x86_mov( func, dst, src ); -} - -static void -emit_movaps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVAPS", dst, src ); - sse_movaps( func, dst, src ); -} - -static void -emit_movss( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVSS", dst, src ); - sse_movss( func, dst, src ); -} - -static void -emit_movups( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVUPS", dst, src ); - sse_movups( func, dst, src ); -} - -static void -emit_mulps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MULPS", dst, src ); - sse_mulps( func, dst, src ); -} - -static void -emit_or( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "OR", dst, src ); - x86_or( func, dst, src ); -} - -static void -emit_orps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ORPS", dst, src ); - sse_orps( func, dst, src ); -} - -static void -emit_pmovmskb( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "PMOVMSKB", dst, src ); - sse_pmovmskb( func, dst, src ); -} - -static void -emit_pop( - struct x86_function *func, - struct x86_reg dst ) -{ - DUMP_R( "POP", dst ); - x86_pop( func, dst ); -} - -static void -emit_push( - struct x86_function *func, - struct x86_reg dst ) -{ - DUMP_R( "PUSH", dst ); - x86_push( func, dst ); -} - -static void -emit_rcpps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "RCPPS", dst, src ); - sse2_rcpps( func, dst, src ); -} #ifdef WIN32 static void @@ -490,7 +200,6 @@ emit_retw( struct x86_function *func, unsigned size ) { - DUMP_I( "RET", size ); x86_retw( func, size ); } #else @@ -498,56 +207,21 @@ static void emit_ret( struct x86_function *func ) { - DUMP( "RET" ); x86_ret( func ); } #endif -static void -emit_rsqrtps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "RSQRTPS", dst, src ); - sse_rsqrtps( func, dst, src ); -} - -static void -emit_shufps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src, - unsigned char shuf ) -{ - DUMP_RRI( "SHUFPS", dst, src, shuf ); - sse_shufps( func, dst, src, shuf ); -} - -static void -emit_subps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "SUBPS", dst, src ); - sse_subps( func, dst, src ); -} - -static void -emit_xorps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "XORPS", dst, src ); - sse_xorps( func, dst, src ); -} /** * Data fetch helpers. */ +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_const( struct x86_function *func, @@ -555,11 +229,11 @@ emit_const( unsigned vec, unsigned chan ) { - emit_movss( + sse_movss( func, make_xmm( xmm ), get_const( vec, chan ) ); - emit_shufps( + sse_shufps( func, make_xmm( xmm ), make_xmm( xmm ), @@ -567,18 +241,49 @@ emit_const( } static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void emit_inputf( struct x86_function *func, unsigned xmm, unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, make_xmm( xmm ), get_input( vec, chan ) ); } +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ static void emit_output( struct x86_function *func, @@ -586,12 +291,18 @@ emit_output( unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, get_output( vec, chan ), make_xmm( xmm ) ); } +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_tempf( struct x86_function *func, @@ -599,12 +310,19 @@ emit_tempf( unsigned vec, unsigned chan ) { - emit_movaps( + sse_movaps( func, make_xmm( xmm ), get_temp( vec, chan ) ); } +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ static void emit_coef( struct x86_function *func, @@ -613,11 +331,11 @@ emit_coef( unsigned chan, unsigned member ) { - emit_movss( + sse_movss( func, make_xmm( xmm ), get_coef( vec, chan, member ) ); - emit_shufps( + sse_shufps( func, make_xmm( xmm ), make_xmm( xmm ), @@ -635,7 +353,7 @@ emit_inputs( unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, get_input( vec, chan ), make_xmm( xmm ) ); @@ -648,7 +366,7 @@ emit_temps( unsigned vec, unsigned chan ) { - emit_movaps( + sse_movaps( func, get_temp( vec, chan ), make_xmm( xmm ) ); @@ -725,41 +443,32 @@ static void emit_push_gp( struct x86_function *func ) { - emit_push( + x86_push( func, - get_const_base() ); - emit_push( + x86_make_reg( file_REG32, reg_AX) ); + x86_push( func, - get_input_base() ); - emit_push( + x86_make_reg( file_REG32, reg_CX) ); + x86_push( func, - get_output_base() ); - - /* It is important on non-win32 platforms that temp base is pushed last. - */ - emit_push( - func, - get_temp_base() ); + x86_make_reg( file_REG32, reg_DX) ); } static void -emit_pop_gp( +x86_pop_gp( struct x86_function *func ) { /* Restore GP registers in a reverse order. */ - emit_pop( - func, - get_temp_base() ); - emit_pop( + x86_pop( func, - get_output_base() ); - emit_pop( + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( func, - get_input_base() ); - emit_pop( + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( func, - get_const_base() ); + x86_make_reg( file_REG32, reg_AX) ); } static void @@ -768,7 +477,7 @@ emit_func_call_dst( unsigned xmm_dst, void (*code)() ) { - emit_movaps( + sse_movaps( func, get_temp( TEMP_R0, 0 ), make_xmm( xmm_dst ) ); @@ -776,20 +485,27 @@ emit_func_call_dst( emit_push_gp( func ); -#ifdef WIN32 - emit_push( - func, - get_temp( TEMP_R0, 0 ) ); + { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); +#ifndef WIN32 + x86_pop(func, ecx ); #endif + } - emit_call( - func, - code ); - emit_pop_gp( + x86_pop_gp( func ); - emit_movaps( + sse_movaps( func, make_xmm( xmm_dst ), get_temp( TEMP_R0, 0 ) ); @@ -802,7 +518,7 @@ emit_func_call_dst_src( unsigned xmm_src, void (*code)() ) { - emit_movaps( + sse_movaps( func, get_temp( TEMP_R0, 1 ), make_xmm( xmm_src ) ); @@ -822,7 +538,7 @@ emit_abs( struct x86_function *func, unsigned xmm ) { - emit_andps( + sse_andps( func, make_xmm( xmm ), get_temp( @@ -836,7 +552,7 @@ emit_add( unsigned xmm_dst, unsigned xmm_src ) { - emit_addps( + sse_addps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -846,18 +562,12 @@ static void XSTDCALL cos4f( float *store ) { -#ifdef WIN32 - store[0] = (float) cos( (double) store[0] ); - store[1] = (float) cos( (double) store[1] ); - store[2] = (float) cos( (double) store[2] ); - store[3] = (float) cos( (double) store[3] ); -#else - const unsigned X = TEMP_R0 * 16; + const unsigned X = 0; + store[X + 0] = cosf( store[X + 0] ); store[X + 1] = cosf( store[X + 1] ); store[X + 2] = cosf( store[X + 2] ); store[X + 3] = cosf( store[X + 3] ); -#endif } static void @@ -875,18 +585,12 @@ static void XSTDCALL ex24f( float *store ) { -#ifdef WIN32 - store[0] = (float) pow( 2.0, (double) store[0] ); - store[1] = (float) pow( 2.0, (double) store[1] ); - store[2] = (float) pow( 2.0, (double) store[2] ); - store[3] = (float) pow( 2.0, (double) store[3] ); -#else - const unsigned X = TEMP_R0 * 16; + const unsigned X = 0; + store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); store[X + 3] = powf( 2.0f, store[X + 3] ); -#endif } static void @@ -905,7 +609,7 @@ emit_f2it( struct x86_function *func, unsigned xmm ) { - emit_cvttps2dq( + sse2_cvttps2dq( func, make_xmm( xmm ), make_xmm( xmm ) ); @@ -915,15 +619,12 @@ static void XSTDCALL flr4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif - store[X + 0] = (float) floor( (double) store[X + 0] ); - store[X + 1] = (float) floor( (double) store[X + 1] ); - store[X + 2] = (float) floor( (double) store[X + 2] ); - store[X + 3] = (float) floor( (double) store[X + 3] ); + + store[X + 0] = floorf( store[X + 0] ); + store[X + 1] = floorf( store[X + 1] ); + store[X + 2] = floorf( store[X + 2] ); + store[X + 3] = floorf( store[X + 3] ); } static void @@ -941,15 +642,12 @@ static void XSTDCALL frc4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif - store[X + 0] -= (float) floor( (double) store[X + 0] ); - store[X + 1] -= (float) floor( (double) store[X + 1] ); - store[X + 2] -= (float) floor( (double) store[X + 2] ); - store[X + 3] -= (float) floor( (double) store[X + 3] ); + + store[X + 0] -= floorf( store[X + 0] ); + store[X + 1] -= floorf( store[X + 1] ); + store[X + 2] -= floorf( store[X + 2] ); + store[X + 3] -= floorf( store[X + 3] ); } static void @@ -967,11 +665,8 @@ static void XSTDCALL lg24f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = LOG2( store[X + 0] ); store[X + 1] = LOG2( store[X + 1] ); store[X + 2] = LOG2( store[X + 2] ); @@ -995,7 +690,7 @@ emit_MOV( unsigned xmm_dst, unsigned xmm_src ) { - emit_movups( + sse_movups( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1006,7 +701,7 @@ emit_mul (struct x86_function *func, unsigned xmm_dst, unsigned xmm_src) { - emit_mulps( + sse_mulps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1017,7 +712,7 @@ emit_neg( struct x86_function *func, unsigned xmm ) { - emit_xorps( + sse_xorps( func, make_xmm( xmm ), get_temp( @@ -1029,18 +724,12 @@ static void XSTDCALL pow4f( float *store ) { -#ifdef WIN32 - store[0] = (float) pow( (double) store[0], (double) store[4] ); - store[1] = (float) pow( (double) store[1], (double) store[5] ); - store[2] = (float) pow( (double) store[2], (double) store[6] ); - store[3] = (float) pow( (double) store[3], (double) store[7] ); -#else - const unsigned X = TEMP_R0 * 16; + const unsigned X = 0; + store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); store[X + 3] = powf( store[X + 3], store[X + 7] ); -#endif } static void @@ -1062,7 +751,11 @@ emit_rcp ( unsigned xmm_dst, unsigned xmm_src ) { - emit_rcpps( + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1074,10 +767,44 @@ emit_rsqrt( unsigned xmm_dst, unsigned xmm_src ) { - emit_rsqrtps( +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); +#endif } static void @@ -1085,7 +812,7 @@ emit_setsign( struct x86_function *func, unsigned xmm ) { - emit_orps( + sse_orps( func, make_xmm( xmm ), get_temp( @@ -1097,18 +824,12 @@ static void XSTDCALL sin4f( float *store ) { -#ifdef WIN32 - store[0] = (float) sin( (double) store[0] ); - store[1] = (float) sin( (double) store[1] ); - store[2] = (float) sin( (double) store[2] ); - store[3] = (float) sin( (double) store[3] ); -#else - const unsigned X = TEMP_R0 * 16; + const unsigned X = 0; + store[X + 0] = sinf( store[X + 0] ); store[X + 1] = sinf( store[X + 1] ); store[X + 2] = sinf( store[X + 2] ); store[X + 3] = sinf( store[X + 3] ); -#endif } static void @@ -1127,7 +848,7 @@ emit_sub( unsigned xmm_dst, unsigned xmm_src ) { - emit_subps( + sse_subps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1160,6 +881,14 @@ emit_fetch( swizzle ); break; + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + case TGSI_FILE_INPUT: emit_inputf( func, @@ -1328,16 +1057,16 @@ emit_kil( } } - emit_push( + x86_push( func, x86_make_reg( file_REG32, reg_AX ) ); - emit_push( + x86_push( func, x86_make_reg( file_REG32, reg_DX ) ); FOR_EACH_CHANNEL( chan_index ) { if( uniquemask & (1 << chan_index) ) { - emit_cmpps( + sse_cmpps( func, make_xmm( registers[chan_index] ), get_temp( @@ -1346,17 +1075,17 @@ emit_kil( cc_LessThan ); if( chan_index == firstchan ) { - emit_pmovmskb( + sse_pmovmskb( func, x86_make_reg( file_REG32, reg_AX ), make_xmm( registers[chan_index] ) ); } else { - emit_pmovmskb( + sse_pmovmskb( func, x86_make_reg( file_REG32, reg_DX ), make_xmm( registers[chan_index] ) ); - emit_or( + x86_or( func, x86_make_reg( file_REG32, reg_AX ), x86_make_reg( file_REG32, reg_DX ) ); @@ -1364,17 +1093,17 @@ emit_kil( } } - emit_or( + x86_or( func, get_temp( TGSI_EXEC_TEMP_KILMASK_I, TGSI_EXEC_TEMP_KILMASK_C ), x86_make_reg( file_REG32, reg_AX ) ); - emit_pop( + x86_pop( func, x86_make_reg( file_REG32, reg_DX ) ); - emit_pop( + x86_pop( func, x86_make_reg( file_REG32, reg_AX ) ); } @@ -1390,12 +1119,12 @@ emit_setcc( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_cmpps( + sse_cmpps( func, make_xmm( 0 ), make_xmm( 1 ), cc ); - emit_andps( + sse_andps( func, make_xmm( 0 ), get_temp( @@ -1416,22 +1145,22 @@ emit_cmp( FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); FETCH( func, *inst, 2, 2, chan_index ); - emit_cmpps( + sse_cmpps( func, make_xmm( 0 ), get_temp( TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ), cc_LessThan ); - emit_andps( + sse_andps( func, make_xmm( 1 ), make_xmm( 0 ) ); - emit_andnps( + sse_andnps( func, make_xmm( 0 ), make_xmm( 2 ) ); - emit_orps( + sse_orps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -1448,11 +1177,16 @@ emit_instruction( switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } +#else + return 0; +#endif break; case TGSI_OPCODE_MOV: @@ -1482,7 +1216,7 @@ emit_instruction( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_maxps( + sse_maxps( func, make_xmm( 0 ), get_temp( @@ -1491,21 +1225,26 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_Y ); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ FETCH( func, *inst, 1, 0, CHAN_Y ); - emit_maxps( + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( func, make_xmm( 1 ), get_temp( TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ FETCH( func, *inst, 2, 0, CHAN_W ); - emit_minps( + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( func, make_xmm( 2 ), get_temp( TGSI_EXEC_TEMP_128_I, TGSI_EXEC_TEMP_128_C ) ); - emit_maxps( + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( func, make_xmm( 2 ), get_temp( @@ -1513,16 +1252,16 @@ emit_instruction( TGSI_EXEC_TEMP_MINUS_128_C ) ); emit_pow( func, 1, 2 ); FETCH( func, *inst, 0, 0, CHAN_X ); - emit_xorps( + sse_xorps( func, make_xmm( 2 ), make_xmm( 2 ) ); - emit_cmpps( + sse_cmpps( func, make_xmm( 2 ), make_xmm( 0 ), cc_LessThanEqual ); - emit_andps( + sse_andps( func, make_xmm( 2 ), make_xmm( 1 ) ); @@ -1543,9 +1282,9 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 0, 0 ); + emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 1, 0, chan_index ); } break; @@ -1644,7 +1383,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_minps( + sse_minps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -1656,7 +1395,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_maxps( + sse_maxps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -1997,7 +1736,6 @@ emit_instruction( break; case TGSI_OPCODE_RET: - case TGSI_OPCODE_END: #ifdef WIN32 emit_retw( func, 16 ); #else @@ -2005,6 +1743,9 @@ emit_instruction( #endif break; + case TGSI_OPCODE_END: + break; + case TGSI_OPCODE_SSG: return 0; break; @@ -2020,7 +1761,7 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_X ); emit_sin( func, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } @@ -2236,149 +1977,289 @@ emit_declaration( } } -unsigned -tgsi_emit_sse2( - struct tgsi_token *tokens, - struct x86_function *func ) -{ - struct tgsi_parse_context parse; - unsigned ok = 1; - - DUMP_START(); - - func->csr = func->store; - - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); - break; - - default: - assert( 0 ); - ok = 0; - break; - } +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); - tgsi_parse_free( &parse ); - - DUMP_END(); - - return ok; + /* Restore EBX */ + x86_pop( func, aos_output ); } /** - * Fragment shaders are responsible for interpolating shader inputs. Because on - * x86 we have only 4 GP registers, and here we have 5 shader arguments (input, - * output, const, temp and coef), the code is split into two phases -- - * DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff argument, - * as outputs are not needed in the DECLARATION phase. + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed */ unsigned -tgsi_emit_sse2_fs( - struct tgsi_token *tokens, - struct x86_function *func ) +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; unsigned ok = 1; - - DUMP_START(); + uint num_immediates = 0; func->csr = func->store; - /* DECLARATION phase, do not load output argument. */ - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( + tgsi_parse_init( &parse, tokens ); + + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( + get_immediate_base() ); + + x86_push( func, - get_coef_base(), - get_argument( 4 ) ); + get_temp_base() ); - tgsi_parse_init( &parse, tokens ); + + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } } + ok = emit_instruction( func, &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); } break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } break; default: @@ -2387,11 +2268,30 @@ tgsi_emit_sse2_fs( } } - tgsi_parse_free( &parse ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } - DUMP_END(); + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + +#ifdef WIN32 + emit_retw( func, 16 ); +#else + emit_ret( func ); +#endif + + tgsi_parse_free( &parse ); return ok; } -#endif /* i386 */ +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 63b8ef3911..e66d115283 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -10,13 +10,10 @@ struct x86_function; unsigned tgsi_emit_sse2( - struct tgsi_token *tokens, - struct x86_function *function ); - -unsigned -tgsi_emit_sse2_fs( - struct tgsi_token *tokens, - struct x86_function *function ); + const struct tgsi_token *tokens, + struct x86_function *function, + float (*immediates)[4], + boolean do_swizzles ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 7d292778ad..4c65ffd780 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -25,205 +25,36 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_parse.h" #include "tgsi_build.h" -struct gen_dump -{ - unsigned tabs; - void (* write)( - struct gen_dump *dump, - const void *data, - unsigned size ); -}; - -struct text_dump -{ - struct gen_dump base; - char *text; - unsigned length; - unsigned capacity; -}; - -static void -_text_dump_write( - struct gen_dump *dump, - const void *data, - unsigned size ) -{ - struct text_dump *td = (struct text_dump *) dump; - unsigned new_length = td->length + size; - - if( new_length >= td->capacity ) { - unsigned new_capacity = td->capacity; - - do { - if( new_capacity == 0 ) { - new_capacity = 256; - } - else { - new_capacity *= 2; - } - } while( new_length >= new_capacity ); - td->text = (char *) REALLOC( - td->text, - td->capacity, - new_capacity ); - td->capacity = new_capacity; - } - memcpy( - &td->text[td->length], - data, - size ); - td->length = new_length; - td->text[td->length] = '\0'; -} - -struct file_dump -{ - struct gen_dump base; - FILE *file; -}; - -static void -_file_dump_write( - struct gen_dump *dump, - const void *data, - unsigned size ) -{ - struct file_dump *fd = (struct file_dump *) dump; - -#if 0 - fwrite( data, 1, size, fd->file ); -#else - { - unsigned i; - - for (i = 0; i < size; i++ ) { - fprintf( fd->file, "%c", ((const char *) data)[i] ); - } - } -#endif -} - -static void -gen_dump_str( - struct gen_dump *dump, - const char *str ) -{ - unsigned i; - size_t len = strlen( str ); - - for (i = 0; i < len; i++) { - dump->write( dump, &str[i], 1 ); - if (str[i] == '\n') { - unsigned i; - - for (i = 0; i < dump->tabs; i++) { - dump->write( dump, " ", 4 ); - } - } - } -} - static void -gen_dump_chr( - struct gen_dump *dump, - const char chr ) -{ - dump->write( dump, &chr, 1 ); -} - -static void -gen_dump_uix( - struct gen_dump *dump, - const unsigned ui ) -{ - char str[36]; - - sprintf( str, "0x%x", ui ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_uid( - struct gen_dump *dump, - const unsigned ui ) -{ - char str[16]; - - sprintf( str, "%u", ui ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_sid( - struct gen_dump *dump, - const int si ) -{ - char str[16]; - - sprintf( str, "%d", si ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_flt( - struct gen_dump *dump, - const float flt ) -{ - char str[48]; - - sprintf( str, "%10.4f", flt ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_enum( - struct gen_dump *dump, +dump_enum( const unsigned e, const char **enums, const unsigned enums_count ) { if (e >= enums_count) { - gen_dump_uid( dump, e ); + debug_printf( "%u", e ); } else { - gen_dump_str( dump, enums[e] ); + debug_printf( "%s", enums[e] ); } } -static void -gen_dump_tab( - struct gen_dump *dump ) -{ - ++dump->tabs; -} - -static void -gen_dump_untab( - struct gen_dump *dump ) -{ - assert( dump->tabs > 0 ); - - --dump->tabs; -} - -#define TXT(S) gen_dump_str( dump, S ) -#define CHR(C) gen_dump_chr( dump, C ) -#define UIX(I) gen_dump_uix( dump, I ) -#define UID(I) gen_dump_uid( dump, I ) -#define SID(I) gen_dump_sid( dump, I ) -#define FLT(F) gen_dump_flt( dump, F ) -#define TAB() gen_dump_tab( dump ) -#define UNT() gen_dump_untab( dump ) -#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) static const char *TGSI_PROCESSOR_TYPES[] = { @@ -710,7 +541,6 @@ static const char *TGSI_MODULATES[] = static void dump_declaration_short( - struct gen_dump *dump, struct tgsi_full_declaration *decl ) { TXT( "\nDCL " ); @@ -746,23 +576,24 @@ dump_declaration_short( } } - if( decl->Declaration.Interpolate ) { + if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); + if (decl->Semantic.SemanticIndex != 0) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } } - if( decl->Declaration.Semantic ) { + if (decl->Declaration.Interpolate) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); - CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); - CHR( ']' ); + ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); } } static void dump_declaration_verbose( - struct gen_dump *dump, struct tgsi_full_declaration *decl, unsigned ignored, unsigned deflt, @@ -800,7 +631,7 @@ dump_declaration_verbose( UIX( decl->Declaration.Padding ); } - CHR( '\n' ); + EOL(); switch( decl->Declaration.Declare ) { case TGSI_DECLARE_RANGE: TXT( "\nFirst: " ); @@ -819,7 +650,7 @@ dump_declaration_verbose( } if( decl->Declaration.Interpolate ) { - CHR( '\n' ); + EOL(); TXT( "\nInterpolate: " ); ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES ); if( ignored ) { @@ -829,7 +660,7 @@ dump_declaration_verbose( } if( decl->Declaration.Semantic ) { - CHR( '\n' ); + EOL(); TXT( "\nSemanticName : " ); ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); TXT( "\nSemanticIndex: " ); @@ -843,7 +674,6 @@ dump_declaration_verbose( static void dump_immediate_short( - struct gen_dump *dump, struct tgsi_full_immediate *imm ) { unsigned i; @@ -871,7 +701,6 @@ dump_immediate_short( static void dump_immediate_verbose( - struct gen_dump *dump, struct tgsi_full_immediate *imm, unsigned ignored ) { @@ -885,7 +714,7 @@ dump_immediate_verbose( } for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - CHR( '\n' ); + EOL(); switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: TXT( "\nFloat: " ); @@ -900,14 +729,13 @@ dump_immediate_verbose( static void dump_instruction_short( - struct gen_dump *dump, struct tgsi_full_instruction *inst, unsigned instno ) { unsigned i; boolean first_reg = TRUE; - CHR( '\n' ); + EOL(); UID( instno ); CHR( ':' ); ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT ); @@ -939,6 +767,31 @@ dump_instruction_short( SID( dst->DstRegister.Index ); CHR( ']' ); + switch (dst->DstRegisterExtModulate.Modulate) { + case TGSI_MODULATE_1X: + break; + case TGSI_MODULATE_2X: + TXT( "_2X" ); + break; + case TGSI_MODULATE_4X: + TXT( "_4X" ); + break; + case TGSI_MODULATE_8X: + TXT( "_8X" ); + break; + case TGSI_MODULATE_HALF: + TXT( "_D2" ); + break; + case TGSI_MODULATE_QUARTER: + TXT( "_D4" ); + break; + case TGSI_MODULATE_EIGHTH: + TXT( "_D8" ); + break; + default: + assert( 0 ); + } + if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) { CHR( '.' ); if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) { @@ -966,18 +819,18 @@ dump_instruction_short( } CHR( ' ' ); - if( src->SrcRegisterExtMod.Complement ) { - TXT( "(1 - " ); - } - if( src->SrcRegisterExtMod.Negate ) { - CHR( '-' ); - } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegister.Negate ) { + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) CHR( '-' ); - } ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); @@ -985,35 +838,37 @@ dump_instruction_short( SID( src->SrcRegister.Index ); CHR( ']' ); - if (src->SrcRegister.Extended) { - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); - } - } - else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) { + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT ); } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); + } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegisterExtMod.Complement ) { + if (src->SrcRegisterExtMod.Negate) CHR( ')' ); - } first_reg = FALSE; } @@ -1037,7 +892,6 @@ dump_instruction_short( static void dump_instruction_verbose( - struct gen_dump *dump, struct tgsi_full_instruction *inst, unsigned ignored, unsigned deflt, @@ -1065,7 +919,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { @@ -1119,7 +973,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { @@ -1137,7 +991,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { @@ -1158,7 +1012,7 @@ dump_instruction_verbose( struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; - CHR( '\n' ); + EOL(); TXT( "\nFile : " ); ENM( dst->DstRegister.File, TGSI_FILES ); if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { @@ -1189,7 +1043,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { @@ -1227,7 +1081,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { @@ -1249,7 +1103,7 @@ dump_instruction_verbose( struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; - CHR( '\n' ); + EOL(); TXT( "\nFile : "); ENM( src->SrcRegister.File, TGSI_FILES ); if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { @@ -1294,7 +1148,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { @@ -1340,7 +1194,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { @@ -1375,9 +1229,8 @@ dump_instruction_verbose( } } -static void -dump_gen( - struct gen_dump *dump, +void +tgsi_dump( const struct tgsi_token *tokens, unsigned flags ) { @@ -1389,18 +1242,17 @@ dump_gen( unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT); unsigned instno = 0; - dump->tabs = 0; - - /* sanity check */ + /* sanity checks */ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); tgsi_parse_init( &parse, tokens ); TXT( "tgsi-dump begin -----------------" ); - CHR( '\n' ); + EOL(); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); - CHR( ' ' ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); UID( parse.FullVersion.Version.MinorVersion ); @@ -1410,7 +1262,7 @@ dump_gen( UID( parse.FullVersion.Version.MajorVersion ); TXT( "\nMinorVersion: " ); UID( parse.FullVersion.Version.MinorVersion ); - CHR( '\n' ); + EOL(); TXT( "\nHeaderSize: " ); UID( parse.FullHeader.Header.HeaderSize ); @@ -1418,7 +1270,7 @@ dump_gen( UID( parse.FullHeader.Header.BodySize ); TXT( "\nProcessor : " ); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - CHR( '\n' ); + EOL(); } fi = tgsi_default_full_instruction(); @@ -1430,19 +1282,16 @@ dump_gen( switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: dump_declaration_short( - dump, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_IMMEDIATE: dump_immediate_short( - dump, &parse.FullToken.FullImmediate ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: dump_instruction_short( - dump, &parse.FullToken.FullInstruction, instno ); instno++; @@ -1467,7 +1316,6 @@ dump_gen( switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: dump_declaration_verbose( - dump, &parse.FullToken.FullDeclaration, ignored, deflt, @@ -1476,14 +1324,12 @@ dump_gen( case TGSI_TOKEN_TYPE_IMMEDIATE: dump_immediate_verbose( - dump, &parse.FullToken.FullImmediate, ignored ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: dump_instruction_verbose( - dump, &parse.FullToken.FullInstruction, ignored, deflt, @@ -1494,7 +1340,7 @@ dump_gen( assert( 0 ); } - CHR( '\n' ); + EOL(); } } @@ -1502,86 +1348,3 @@ dump_gen( tgsi_parse_free( &parse ); } - - -static void -sanity_checks(void) -{ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); -} - - -void -tgsi_dump( - const struct tgsi_token *tokens, - unsigned flags ) -{ - struct file_dump dump; - - sanity_checks(); - - dump.base.write = _file_dump_write; -#if 0 - { - static unsigned counter = 0; - char buffer[64]; - sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ ); - dump.file = fopen( buffer, "wt" ); - } -#else - dump.file = stderr; -#endif - - dump_gen( - &dump.base, - tokens, - flags ); - -#if 0 - fclose( dump.file ); -#endif -} - -void -tgsi_dump_str( - char **str, - const struct tgsi_token *tokens, - unsigned flags ) -{ - struct text_dump dump; - - dump.base.write = _text_dump_write; - dump.text = NULL; - dump.length = 0; - dump.capacity = 0; - - dump_gen( - &dump.base, - tokens, - flags ); - - *str = dump.text; -} - - -void tgsi_debug_dump( struct tgsi_token *tokens ) -{ - char *str, *p; - - tgsi_dump_str( &str, tokens, 0 ); - - p = str; - while (p != NULL) - { - char *end = strchr( p, '\n' ); - if (end != NULL) - { - *end++ = '\0'; - } - debug_printf( "%s\n", p ); - p = end; - } - - FREE( str ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index 51d79a0362..beb0155d56 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -14,16 +14,6 @@ tgsi_dump( const struct tgsi_token *tokens, unsigned flags ); -void -tgsi_dump_str( - char **str, - const struct tgsi_token *tokens, - unsigned flags ); - -/* Dump to debug_printf() - */ -void tgsi_debug_dump( struct tgsi_token *tokens ); - #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index c3526cb71f..5c0b0bfd61 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -43,7 +43,7 @@ tgsi_full_token_free( union tgsi_full_token *full_token ) { if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { - FREE( full_token->FullImmediate.u.Pointer ); + FREE( (void *) full_token->FullImmediate.u.Pointer ); } } @@ -156,7 +156,7 @@ tgsi_parse_token( imm->u.Pointer = MALLOC( sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - next_token( ctx, &imm->u.ImmediateFloat32[i] ); + next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); } break; @@ -330,3 +330,18 @@ tgsi_num_tokens(const struct tgsi_token *tokens) } return 0; } + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index a98e88e343..4102101093 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -1,6 +1,35 @@ -#if !defined TGSI_PARSE_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H #define TGSI_PARSE_H +#include "pipe/p_shader_tokens.h" + #if defined __cplusplus extern "C" { #endif @@ -50,8 +79,8 @@ struct tgsi_full_immediate struct tgsi_immediate Immediate; union { - void *Pointer; - struct tgsi_immediate_float32 *ImmediateFloat32; + const void *Pointer; + const struct tgsi_immediate_float32 *ImmediateFloat32; } u; }; @@ -116,6 +145,8 @@ tgsi_parse_token( unsigned tgsi_num_tokens(const struct tgsi_token *tokens); +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index ea4a72967d..65650ed22a 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -103,18 +103,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)i); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[info->num_inputs] - = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[info->num_inputs] - = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[info->num_outputs] - = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[info->num_outputs] - = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; } @@ -137,6 +133,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } + assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); + assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile new file mode 100644 index 0000000000..ad2a5b705e --- /dev/null +++ b/src/gallium/auxiliary/translate/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = translate + +C_SOURCES = \ + translate_generic.c \ + translate_sse.c \ + translate.c \ + translate_cache.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript new file mode 100644 index 0000000000..9553a67537 --- /dev/null +++ b/src/gallium/auxiliary/translate/SConscript @@ -0,0 +1,12 @@ +Import('*') + +translate = env.ConvenienceLibrary( + target = 'translate', + source = [ + 'translate_generic.c', + 'translate_sse.c', + 'translate.c', + 'translate_cache.c', + ]) + +auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/translate/translate.c index d6220b5f62..b04bc6eefd 100644 --- a/src/gallium/auxiliary/draw/draw_debug.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -30,84 +30,19 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "draw_private.h" -#include "draw_context.h" +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" - - -static void -draw_prim_info(unsigned prim, unsigned *first, unsigned *incr) -{ - assert(prim >= PIPE_PRIM_POINTS); - assert(prim <= PIPE_PRIM_POLYGON); - - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - break; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - break; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_LINE_LOOP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - break; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - break; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - break; - default: - assert(0); - *first = 1; - *incr = 1; - break; - } -} - - -unsigned -draw_trim_prim( unsigned mode, unsigned count ) +struct translate *translate_create( const struct translate_key *key ) { - unsigned length, first, incr; + struct translate *translate = NULL; - draw_prim_info( mode, &first, &incr ); +#if defined(__i386__) || defined(__386__) || defined(i386) + translate = translate_sse2_create( key ); + if (translate) + return translate; +#endif - if (count < first) - length = 0; - else - length = count - (count - first) % incr; - - return length; -} - - -boolean -draw_validate_prim( unsigned mode, unsigned count ) -{ - return (count > 0 && - count == draw_trim_prim( mode, count )); + return translate_generic_create( key ); } - diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h new file mode 100644 index 0000000000..b8210af50c --- /dev/null +++ b/src/gallium/auxiliary/translate/translate.h @@ -0,0 +1,127 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Vertex fetch/store/convert code. This functionality is used in two places: + * 1. Vertex fetch/convert - to grab vertex data from incoming vertex + * arrays and convert to format needed by vertex shaders. + * 2. Vertex store/emit - to convert simple float[][4] vertex attributes + * (which is the organization used throughout the draw/prim pipeline) to + * hardware-specific formats and emit into hardware vertex buffers. + * + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#ifndef _TRANSLATE_H +#define _TRANSLATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + +struct translate_element +{ + enum pipe_format input_format; + enum pipe_format output_format; + unsigned input_buffer; + unsigned input_offset; /* can't really reduce the size of these */ + unsigned output_offset; +}; + + +struct translate_key { + unsigned output_stride; + unsigned nr_elements; + struct translate_element element[PIPE_MAX_ATTRIBS]; +}; + + +struct translate { + struct translate_key key; + + void (*release)( struct translate * ); + + void (*set_buffer)( struct translate *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (*run_elts)( struct translate *, + const unsigned *elts, + unsigned count, + void *output_buffer); + + void (*run)( struct translate *, + unsigned start, + unsigned count, + void *output_buffer); +}; + + + +#if 0 +struct translate_context *translate_context_create( void ); +void translate_context_destroy( struct translate_context * ); + +struct translate *translate_lookup_or_create( struct translate_context *tctx, + const struct translate_key *key ); +#endif + + +struct translate *translate_create( const struct translate_key *key ); + +static INLINE int translate_keysize( const struct translate_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); +} + +static INLINE int translate_key_compare( const struct translate_key *a, + const struct translate_key *b ) +{ + int keysize = translate_keysize(a); + return memcmp(a, b, keysize); +} + + +static INLINE void translate_key_sanitize( struct translate_key *a ) +{ + int keysize = translate_keysize(a); + char *ptr = (char *)a; + memset(ptr + keysize, 0, sizeof(*a) - keysize); +} + + +/******************************************************************************* + * Private: + */ +struct translate *translate_sse2_create( const struct translate_key *key ); + +struct translate *translate_generic_create( const struct translate_key *key ); + + +#endif diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c new file mode 100644 index 0000000000..115dc9287e --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" +#include "translate_cache.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct translate_cache { + struct cso_hash *hash; +}; + +struct translate_cache * translate_cache_create( void ) +{ + struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + cache->hash = cso_hash_create(); + return cache; +} + + +static INLINE void delete_translates(struct translate_cache *cache) +{ + struct cso_hash *hash = cache->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +void translate_cache_destroy(struct translate_cache *cache) +{ + delete_translates(cache); + cso_hash_delete(cache->hash); + FREE(cache); +} + + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +struct translate * translate_cache_find(struct translate_cache *cache, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(cache->hash, + hash_key, + key, sizeof(*key)); + + if (!translate) { + /* create/insert */ + translate = translate_create(key); + cso_hash_insert(cache->hash, hash_key, translate); + } + + return translate; +} diff --git a/src/gallium/auxiliary/translate/translate_cache.h b/src/gallium/auxiliary/translate/translate_cache.h new file mode 100644 index 0000000000..7dba871e57 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _TRANSLATE_CACHE_H +#define _TRANSLATE_CACHE_H + + +/******************************************************************************* + * Translate cache. + * Simply used to cache created translates. Avoids unecessary creation of + * translate's if one suitable for a given translate_key has already been + * created. + * + * Note: this functionality depends and requires the CSO module. + */ +struct translate_cache; + +struct translate_key; +struct translate; + +struct translate_cache *translate_cache_create( void ); +void translate_cache_destroy(struct translate_cache *cache); + +/** + * Will try to find a translate structure matched by the given key. + * If such a structure doesn't exist in the cache the function + * will automatically create it, insert it in the cache and + * return the created version. + * + */ +struct translate *translate_cache_find(struct translate_cache *cache, + struct translate_key *key); + +#endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c new file mode 100644 index 0000000000..402780ee53 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -0,0 +1,676 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" + + +#define DRAW_DBG 0 + +typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*emit_func)(const float *attrib, void *ptr); + + + +struct translate_generic { + struct translate translate; + + struct { + fetch_func fetch; + unsigned buffer; + unsigned input_offset; + + emit_func emit; + unsigned output_offset; + + char *input_ptr; + unsigned input_stride; + + } attrib[PIPE_MAX_ATTRIBS]; + + unsigned nr_attrib; +}; + + +static struct translate_generic *translate_generic( struct translate *translate ) +{ + return (struct translate_generic *)translate; +} + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ +static void \ +fetch_##NAME(const void *ptr, float *attrib) \ +{ \ + const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ + unsigned i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib[i] = FROM(i); \ + } \ + \ + for (; i < 4; i++) { \ + attrib[i] = defaults[i]; \ + } \ +} \ + \ +static void \ +emit_##NAME(const float *attrib, void *ptr) \ +{ \ + unsigned i; \ + TYPE *out = (TYPE *)ptr; \ + \ + for (i = 0; i < SZ; i++) { \ + out[i] = TO(attrib[i]); \ + } \ +} + + +#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i]) +#define FROM_32_FLOAT(i) (((float *) ptr)[i]) + +#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i]) +#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i]) +#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i]) + +#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i]) +#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i]) +#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i]) + +#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f) +#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f) +#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f) + +#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f) +#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) +#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) + +#define TO_64_FLOAT(x) ((double) x) +#define TO_32_FLOAT(x) (x) + +#define TO_8_USCALED(x) ((unsigned char) x) +#define TO_16_USCALED(x) ((unsigned short) x) +#define TO_32_USCALED(x) ((unsigned int) x) + +#define TO_8_SSCALED(x) ((char) x) +#define TO_16_SSCALED(x) ((short) x) +#define TO_32_SSCALED(x) ((int) x) + +#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f)) +#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f)) +#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f)) + +#define TO_8_SNORM(x) ((char) (x * 127.0f)) +#define TO_16_SNORM(x) ((short) (x * 32767.0f)) +#define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) + + + +ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT ) + +ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT ) + +ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED ) + +ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED ) + +ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM ) + +ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM ) + +ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED ) + +ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED ) + +ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM ) + +ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM ) + +ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED ) + +ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED ) + +ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM ) + +ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) + +ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) + + + +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = FROM_8_UNORM(0); + attrib[1] = FROM_8_UNORM(1); + attrib[0] = FROM_8_UNORM(2); + attrib[3] = FROM_8_UNORM(3); +} + +static void +emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[2] = TO_8_UNORM(attrib[0]); + out[1] = TO_8_UNORM(attrib[1]); + out[0] = TO_8_UNORM(attrib[2]); + out[3] = TO_8_UNORM(attrib[3]); +} + +static void +fetch_NULL( const void *ptr, float *attrib ) +{ + attrib[0] = 0; + attrib[1] = 0; + attrib[2] = 0; + attrib[3] = 1; +} + +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + +static fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return fetch_B8G8R8A8_UNORM; + + default: + assert(0); + return fetch_NULL; + } +} + + + + +static emit_func get_emit_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return emit_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return emit_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return emit_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return emit_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return emit_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return emit_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return emit_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return emit_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return emit_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return emit_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return emit_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return emit_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return emit_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return emit_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return emit_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return emit_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return emit_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return emit_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return emit_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return emit_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return emit_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return emit_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return emit_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return emit_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return emit_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return emit_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return emit_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return emit_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return emit_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return emit_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return emit_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return emit_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return emit_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return emit_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return emit_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return emit_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return emit_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return emit_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return emit_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return emit_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return emit_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return emit_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return emit_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return emit_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return emit_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return emit_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return emit_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return emit_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return emit_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return emit_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return emit_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return emit_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return emit_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return emit_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return emit_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return emit_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return emit_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return emit_B8G8R8A8_UNORM; + + default: + assert(0); + return emit_NULL; + } +} + + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = *elts++; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + + if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = start + i; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + + if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_generic *tg = translate_generic(translate); + unsigned i; + + for (i = 0; i < tg->nr_attrib; i++) { + if (tg->attrib[i].buffer == buf) { + tg->attrib[i].input_ptr = ((char *)ptr + + tg->attrib[i].input_offset); + tg->attrib[i].input_stride = stride; + } + } +} + + +static void generic_release( struct translate *translate ) +{ + /* Refcount? + */ + FREE(translate); +} + +struct translate *translate_generic_create( const struct translate_key *key ) +{ + struct translate_generic *tg = CALLOC_STRUCT(translate_generic); + unsigned i; + + if (tg == NULL) + return NULL; + + tg->translate.key = *key; + tg->translate.release = generic_release; + tg->translate.set_buffer = generic_set_buffer; + tg->translate.run_elts = generic_run_elts; + tg->translate.run = generic_run; + + for (i = 0; i < key->nr_elements; i++) { + + tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); + tg->attrib[i].buffer = key->element[i].input_buffer; + tg->attrib[i].input_offset = key->element[i].input_offset; + + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + tg->attrib[i].output_offset = key->element[i].output_offset; + + } + + tg->nr_attrib = key->nr_elements; + + + return &tg->translate; +} diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c new file mode 100644 index 0000000000..a54ac5a82f --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -0,0 +1,625 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "util/u_simple_list.h" + +#include "translate.h" + + +#if defined(__i386__) || defined(__386__) || defined(i386) + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +#ifdef WIN32 +#define RTASM __cdecl +#else +#define RTASM +#endif + +typedef void (RTASM *run_func)( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ); + +typedef void (RTASM *run_elts_func)( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + + +struct translate_sse { + struct translate translate; + + struct x86_function linear_func; + struct x86_function elt_func; + struct x86_function *func; + + boolean loaded_identity; + boolean loaded_255; + boolean loaded_inv_255; + + float identity[4]; + float float_255[4]; + float inv_255[4]; + + struct { + char *input_ptr; + unsigned input_stride; + } attrib[PIPE_MAX_ATTRIBS]; + + run_func gen_run; + run_elts_func gen_run_elts; + +}; + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + + + +static struct x86_reg get_identity( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_identity) { + /* Nasty: + */ + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_identity = TRUE; + p->identity[0] = 0; + p->identity[1] = 0; + p->identity[2] = 0; + p->identity[3] = 1; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->identity[0]))); + } + + return reg; +} + +static struct x86_reg get_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_255 = TRUE; + p->float_255[0] = + p->float_255[1] = + p->float_255[2] = + p->float_255[3] = 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->float_255[0]))); + } + + return reg; + return x86_make_reg(file_XMM, 7); +} + +static struct x86_reg get_inv_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 5); + + if (!p->loaded_inv_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_inv_255 = TRUE; + p->inv_255[0] = + p->inv_255[1] = + p->inv_255[2] = + p->inv_255[3] = 1.0f / 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->inv_255[0]))); + } + + return reg; +} + + +static void emit_load_R32G32B32A32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + sse_movups(p->func, data, arg0); +} + +static void emit_load_R32G32B32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(p->func, data, x86_make_disp(arg0, 8)); + sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); + sse_movlps(p->func, data, arg0); +} + +static void emit_load_R32G32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* 0 0 0 1 + * a b 0 1 + */ + sse_movups(p->func, data, get_identity(p) ); + sse_movlps(p->func, data, arg0); +} + + +static void emit_load_R32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* a 0 0 0 + * a 0 0 1 + */ + sse_movss(p->func, data, arg0); + sse_orps(p->func, data, get_identity(p) ); +} + + +static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg data, + struct x86_reg src ) +{ + + /* Load and unpack twice: + */ + sse_movss(p->func, data, src); + sse2_punpcklbw(p->func, data, get_identity(p)); + sse2_punpcklbw(p->func, data, get_identity(p)); + + /* Convert to float: + */ + sse2_cvtdq2ps(p->func, data, data); + + + /* Scale by 1/255.0 + */ + sse_mulps(p->func, data, get_inv_255(p)); +} + + + + +static void emit_store_R32G32B32A32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movups(p->func, dest, dataXMM); +} + +static void emit_store_R32G32B32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Emit two, shuffle, emit one. + */ + sse_movlps(p->func, dest, dataXMM); + sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(p->func, x86_make_disp(dest,8), dataXMM); +} + +static void emit_store_R32G32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movlps(p->func, dest, dataXMM); +} + +static void emit_store_R32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movss(p->func, dest, dataXMM); +} + + + +static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Scale by 255.0 + */ + sse_mulps(p->func, dataXMM, get_255(p)); + + /* Pack and emit: + */ + sse2_cvtps2dq(p->func, dataXMM, dataXMM); + sse2_packssdw(p->func, dataXMM, dataXMM); + sse2_packuswb(p->func, dataXMM, dataXMM); + sse_movss(p->func, dest, dataXMM); +} + + + + + +static void get_src_ptr( struct translate_sse *p, + struct x86_reg srcEAX, + struct x86_reg translateREG, + struct x86_reg eltREG, + unsigned a ) +{ + struct x86_reg input_ptr = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_ptr)); + + struct x86_reg input_stride = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(p->func, srcEAX, input_stride); + x86_imul(p->func, srcEAX, eltREG); + x86_add(p->func, srcEAX, input_ptr); +} + + +/* Extended swizzles? Maybe later. + */ +static void emit_swizzle( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg src, + unsigned shuffle ) +{ + sse_shufps(p->func, dest, src, shuffle); +} + + +static boolean translate_attr( struct translate_sse *p, + const struct translate_element *a, + struct x86_reg srcECX, + struct x86_reg dstEAX) +{ + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + switch (a->input_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_load_R32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_load_R32G32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_load_R32G32B32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_load_R32G32B32A32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + break; + default: + return FALSE; + } + + switch (a->output_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_store_R32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_store_R32G32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_store_R32G32B32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_store_R32G32B32A32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + default: + return FALSE; + } + + return TRUE; +} + +/* Build run( struct translate *translate, + * unsigned start, + * unsigned count, + * void *output_buffer ) + * or + * run_elts( struct translate *translate, + * unsigned *elts, + * unsigned count, + * void *output_buffer ) + * + * Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct translate_sse *p, + struct x86_function *func, + boolean linear ) +{ + struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX); + struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + int fixup, label; + unsigned j; + + p->func = func; + p->loaded_inv_255 = FALSE; + p->loaded_255 = FALSE; + p->loaded_identity = FALSE; + + x86_init_func(p->func); + + /* Push a few regs? + */ + x86_push(p->func, countEBP); + x86_push(p->func, translateESI); + x86_push(p->func, idxEBX); + + /* Get vertex count, compare to zero + */ + x86_xor(p->func, idxEBX, idxEBX); + x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3)); + x86_cmp(p->func, countEBP, idxEBX); + fixup = x86_jcc_forward(p->func, cc_E); + + /* If linear, idx is the current element, otherwise it is a pointer + * to the current element. + */ + x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2)); + + /* Initialize destination register. + */ + x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4)); + + /* Move argument 1 (translate_sse pointer) into a reg: + */ + x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1)); + + + /* always load, needed or not: + */ + + /* Note address for loop jump */ + label = x86_get_label(p->func); + + + for (j = 0; j < p->translate.key.nr_elements; j++) { + const struct translate_element *a = &p->translate.key.element[j]; + + struct x86_reg destEAX = x86_make_disp(vertexECX, + a->output_offset); + + /* Figure out source pointer address: + */ + if (linear) { + get_src_ptr(p, srcEAX, translateESI, idxEBX, j); + } + else { + get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j); + } + + if (!translate_attr( p, a, x86_deref(srcEAX), destEAX )) + return FALSE; + } + + /* Next vertex: + */ + x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); + + /* Incr index + */ /* Emit code for each of the attributes. Currently routes + * everything through SSE registers, even when it might be more + * efficient to stick with regular old x86. No optimization or + * other tricks - enough new ground to cover here just getting + * things working. + */ + + if (linear) { + x86_inc(p->func, idxEBX); + } + else { + x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4)); + } + + /* decr count, loop if not zero + */ + x86_dec(p->func, countEBP); + x86_test(p->func, countEBP, countEBP); + x86_jcc(p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func->need_emms) + mmx_emms(p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(p->func, fixup); + + /* Pop regs and return + */ + + x86_pop(p->func, idxEBX); + x86_pop(p->func, translateESI); + x86_pop(p->func, countEBP); + x86_ret(p->func); + + return TRUE; +} + + + + + + + +static void translate_sse_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + unsigned i; + + for (i = 0; i < p->translate.key.nr_elements; i++) { + if (p->translate.key.element[i].input_buffer == buf) { + p->attrib[i].input_ptr = ((char *)ptr + + p->translate.key.element[i].input_offset); + p->attrib[i].input_stride = stride; + } + } +} + + +static void translate_sse_release( struct translate *translate ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + x86_release_func( &p->linear_func ); + x86_release_func( &p->elt_func ); + + FREE(p); +} + +static void translate_sse_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run_elts( translate, + elts, + count, + output_buffer ); +} + +static void translate_sse_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run( translate, + start, + count, + output_buffer ); +} + + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + struct translate_sse *p = NULL; + + if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) + goto fail; + + p = CALLOC_STRUCT( translate_sse ); + if (p == NULL) + goto fail; + + p->translate.key = *key; + p->translate.release = translate_sse_release; + p->translate.set_buffer = translate_sse_set_buffer; + p->translate.run_elts = translate_sse_run_elts; + p->translate.run = translate_sse_run; + + if (!build_vertex_emit(p, &p->linear_func, TRUE)) + goto fail; + + if (!build_vertex_emit(p, &p->elt_func, FALSE)) + goto fail; + + p->gen_run = (run_func)x86_get_func(&p->linear_func); + if (p->gen_run == NULL) + goto fail; + + p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); + if (p->gen_run_elts == NULL) + goto fail; + + return &p->translate; + + fail: + if (p) + translate_sse_release( &p->translate ); + + return NULL; +} + + + +#else + +void translate_create_sse( const struct translate_key *key ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 9b6c2708b6..05bc43131a 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -14,7 +14,9 @@ C_SOURCES = \ u_hash_table.c \ u_mm.c \ u_simple_shaders.c \ - u_snprintf.c + u_snprintf.c \ + u_time.c \ + u_mm.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9b3929eb2d..d55d2c7081 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -15,6 +15,7 @@ util = env.ConvenienceLibrary( 'u_mm.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_time.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 090e3b7794..4ec1746662 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -26,9 +26,11 @@ **************************************************************************/ +#include "pipe/p_config.h" + #include <stdarg.h> -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY #include <windows.h> #include <winddi.h> #else @@ -39,9 +41,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "pipe/p_format.h" +#include "util/u_string.h" -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY static INLINE void _EngDebugPrint(const char *format, ...) { @@ -55,13 +59,18 @@ _EngDebugPrint(const char *format, ...) void _debug_vprintf(const char *format, va_list ap) { -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY #ifndef WINCE /* EngDebugPrint does not handle float point arguments, so we need to use - * our own vsnprintf implementation */ - char buf[512 + 1]; - vsnprintf(buf, sizeof(buf), format, ap); - _EngDebugPrint("%s", buf); + * our own vsnprintf implementation. It is also very slow, so buffer until + * we find a newline. */ + static char buf[512 + 1] = {'\0'}; + size_t len = strlen(buf); + int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if(ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) { + _EngDebugPrint("%s", buf); + buf[0] = '\0'; + } #else /* TODO: Implement debug print for WINCE */ #endif @@ -95,7 +104,7 @@ void _debug_break(void) __asm("int3"); #elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) _asm {int 3}; -#elif defined(WIN32) && !defined(WINCE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) EngDebugBreak(); #else abort(); @@ -103,7 +112,7 @@ void _debug_break(void) } -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY static const char * find(const char *start, const char *end, char c) { @@ -144,7 +153,7 @@ const char * debug_get_option(const char *name, const char *dfault) { const char *result; -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY ULONG_PTR iFile = 0; const void *pMap = NULL; const char *sol, *eol, *sep; @@ -194,6 +203,8 @@ debug_get_bool_option(const char *name, boolean dfault) if(str == NULL) result = dfault; + else if(!strcmp(str, "n")) + result = FALSE; else if(!strcmp(str, "no")) result = FALSE; else if(!strcmp(str, "0")) @@ -245,57 +256,16 @@ debug_get_flags_option(const char *name, } -#if defined(WIN32) -ULONG_PTR debug_config_file = 0; -void *mapped_config_file = 0; - -enum { - eAssertAbortEn = 0x1, -}; - -/* Check for aborts enabled. */ -static unsigned abort_en(void) -{ - if (!mapped_config_file) - { - /* Open an 8 byte file for configuration data. */ - mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); - } - - /* A value of "0" (ascii) in the configuration file will clear the - * first 8 bits in the test byte. - * - * A value of "1" (ascii) in the configuration file will set the - * first bit in the test byte. - * - * A value of "2" (ascii) in the configuration file will set the - * second bit in the test byte. - * - * Currently the only interesting values are 0 and 1, which clear - * and set abort-on-assert behaviour respectively. - */ - return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; -} -#else /* WIN32 */ -static unsigned abort_en(void) -{ - return !GETENV("GALLIUM_ABORT_ON_ASSERT"); -} -#endif - void _debug_assert_fail(const char *expr, const char *file, unsigned line, const char *function) { _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); - if (abort_en()) - { + if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) debug_break(); - } else - { + else _debug_printf("continuing...\n"); - } } @@ -311,7 +281,7 @@ debug_dump_enum(const struct debug_named_value *names, ++names; } - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } @@ -344,7 +314,7 @@ debug_dump_flags(const struct debug_named_value *names, else first = 0; - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); strncat(output, rest, sizeof(output)); } @@ -355,3 +325,101 @@ debug_dump_flags(const struct debug_named_value *names, } + + + + +char *pf_sprint_name( char *str, enum pipe_format format ) +{ + strcpy( str, "PIPE_FORMAT_" ); + switch (pf_layout( format )) { + case PIPE_FORMAT_LAYOUT_RGBAZS: + { + pipe_format_rgbazs_t rgbazs = (pipe_format_rgbazs_t) format; + uint i; + uint scale = 1 << (pf_exp8( rgbazs ) * 3); + + for (i = 0; i < 4; i++) { + uint size = pf_size_xyzw( rgbazs, i ); + + if (size == 0) { + break; + } + switch (pf_swizzle_xyzw( rgbazs, i )) { + case PIPE_FORMAT_COMP_R: + strcat( str, "R" ); + break; + case PIPE_FORMAT_COMP_G: + strcat( str, "G" ); + break; + case PIPE_FORMAT_COMP_B: + strcat( str, "B" ); + break; + case PIPE_FORMAT_COMP_A: + strcat( str, "A" ); + break; + case PIPE_FORMAT_COMP_0: + strcat( str, "0" ); + break; + case PIPE_FORMAT_COMP_1: + strcat( str, "1" ); + break; + case PIPE_FORMAT_COMP_Z: + strcat( str, "Z" ); + break; + case PIPE_FORMAT_COMP_S: + strcat( str, "S" ); + break; + } + util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); + } + if (i != 0) { + strcat( str, "_" ); + } + switch (pf_type( rgbazs )) { + case PIPE_FORMAT_TYPE_UNKNOWN: + strcat( str, "NONE" ); + break; + case PIPE_FORMAT_TYPE_FLOAT: + strcat( str, "FLOAT" ); + break; + case PIPE_FORMAT_TYPE_UNORM: + strcat( str, "UNORM" ); + break; + case PIPE_FORMAT_TYPE_SNORM: + strcat( str, "SNORM" ); + break; + case PIPE_FORMAT_TYPE_USCALED: + strcat( str, "USCALED" ); + break; + case PIPE_FORMAT_TYPE_SSCALED: + strcat( str, "SSCALED" ); + break; + } + } + break; + case PIPE_FORMAT_LAYOUT_YCBCR: + { + pipe_format_ycbcr_t ycbcr = (pipe_format_ycbcr_t) format; + + strcat( str, "YCBCR" ); + if (pf_rev( ycbcr )) { + strcat( str, "_REV" ); + } + } + break; + } + return str; +} + + +#ifdef DEBUG +void debug_print_format(const char *msg, unsigned fmt ) +{ + char fmtstr[80]; + + pf_sprint_name(fmtstr, (enum pipe_format)fmt); + + debug_printf("%s: %s\n", msg, fmtstr); +} +#endif diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index c160afe5b7..3b5e4fbaee 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -32,9 +32,13 @@ * @author José Fonseca <jrfonseca@tungstengraphics.com> */ -#ifdef WIN32 +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include <windows.h> #include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#include <wdm.h> #else #include <stdio.h> #include <stdlib.h> @@ -47,9 +51,12 @@ #define DEBUG_MEMORY_MAGIC 0x6e34090aU -#if defined(WIN32) && !defined(WINCE) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) #define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') #define real_free(_ptr) EngFreeMem(_ptr) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#define real_malloc(_size) ExAllocatePool(0, _size) +#define real_free(_ptr) ExFreePool(_ptr) #else #define real_malloc(_size) malloc(_size) #define real_free(_ptr) free(_ptr) @@ -70,8 +77,26 @@ struct debug_memory_header static struct list_head list = { &list, &list }; -static unsigned long start_no = 0; -static unsigned long end_no = 0; +static unsigned long last_no = 0; + + +static INLINE struct debug_memory_header * +header_from_data(void *data) +{ + if(data) + return (struct debug_memory_header *)((char *)data - sizeof(struct debug_memory_header)); + else + return NULL; +} + +static INLINE void * +data_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (void *)((char *)hdr + sizeof(struct debug_memory_header)); + else + return NULL; +} void * @@ -84,7 +109,7 @@ debug_malloc(const char *file, unsigned line, const char *function, if(!hdr) return NULL; - hdr->no = end_no++; + hdr->no = last_no++; hdr->file = file; hdr->line = line; hdr->function = function; @@ -93,7 +118,7 @@ debug_malloc(const char *file, unsigned line, const char *function, LIST_ADDTAIL(&hdr->head, &list); - return (void *)((char *)hdr + sizeof(*hdr)); + return data_from_header(hdr); } void @@ -105,7 +130,7 @@ debug_free(const char *file, unsigned line, const char *function, if(!ptr) return; - hdr = (struct debug_memory_header *)((char *)ptr - sizeof(*hdr)); + hdr = header_from_data(ptr); if(hdr->magic != DEBUG_MEMORY_MAGIC) { debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n", file, line, function, @@ -134,27 +159,57 @@ void * debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ) { - void *new_ptr = NULL; - - if (new_size != 0) { - new_ptr = debug_malloc( file, line, function, new_size ); - - if( new_ptr && old_ptr ) - memcpy( new_ptr, old_ptr, old_size ); + struct debug_memory_header *old_hdr, *new_hdr; + void *new_ptr; + + if(!old_ptr) + return debug_malloc( file, line, function, new_size ); + + if(!new_size) { + debug_free( file, line, function, old_ptr ); + return NULL; + } + + old_hdr = header_from_data(old_ptr); + if(old_hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n", + file, line, function, + old_ptr); + debug_assert(0); + return NULL; } + + /* alloc new */ + new_hdr = real_malloc(sizeof(*new_hdr) + new_size); + if(!new_hdr) + return NULL; + new_hdr->no = old_hdr->no; + new_hdr->file = old_hdr->file; + new_hdr->line = old_hdr->line; + new_hdr->function = old_hdr->function; + new_hdr->size = new_size; + new_hdr->magic = DEBUG_MEMORY_MAGIC; + LIST_REPLACE(&old_hdr->head, &new_hdr->head); + + /* copy data */ + new_ptr = data_from_header(new_hdr); + memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); + + /* free old */ + old_hdr->magic = 0; + real_free(old_hdr); - debug_free( file, line, function, old_ptr ); return new_ptr; } -void -debug_memory_reset(void) +unsigned long +debug_memory_begin(void) { - start_no = end_no; + return last_no; } void -debug_memory_report(void) +debug_memory_end(unsigned long start_no) { struct list_head *entry; @@ -163,8 +218,9 @@ debug_memory_report(void) struct debug_memory_header *hdr; void *ptr; hdr = LIST_ENTRY(struct debug_memory_header, entry, head); - ptr = (void *)((char *)hdr + sizeof(*hdr)); - if(hdr->no >= start_no) + ptr = data_from_header(hdr); + if(start_no <= hdr->no && hdr->no < last_no || + last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 520da5cecd..63e1cc6013 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -169,6 +169,52 @@ a8r8g8b8_put_tile_rgba(unsigned *dst, } +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); + pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); + pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); + pRow[3] = UBYTE_TO_FLOAT(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ static void @@ -339,7 +385,7 @@ z16_get_tile_rgba(ushort *src, -/*** PIPE_FORMAT_U_L8 ***/ +/*** PIPE_FORMAT_L8_UNORM ***/ static void l8_get_tile_rgba(ubyte *src, @@ -362,7 +408,7 @@ l8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8 ***/ +/*** PIPE_FORMAT_A8_UNORM ***/ static void a8_get_tile_rgba(ubyte *src, @@ -430,7 +476,7 @@ r16g16b16a16_put_tile_rgba(short *dst, -/*** PIPE_FORMAT_U_I8 ***/ +/*** PIPE_FORMAT_I8_UNORM ***/ static void i8_get_tile_rgba(ubyte *src, @@ -453,7 +499,7 @@ i8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8_L8 ***/ +/*** PIPE_FORMAT_A8L8_UNORM ***/ static void a8_l8_get_tile_rgba(ushort *src, @@ -647,6 +693,9 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; @@ -659,16 +708,16 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -681,6 +730,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_Z24S8_UNORM: @@ -723,6 +773,9 @@ pipe_put_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; @@ -734,16 +787,16 @@ pipe_put_tile_rgba(struct pipe_context *pipe, break; case PIPE_FORMAT_R8G8B8A8_UNORM: break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -756,6 +809,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_Z24S8_UNORM: @@ -800,6 +854,7 @@ pipe_get_tile_z(struct pipe_context *pipe, } break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); @@ -860,6 +915,7 @@ pipe_put_tile_z(struct pipe_context *pipe, } break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); for (i = 0; i < h; i++) { diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index eec5e600c9..568d62ced1 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -57,6 +57,7 @@ struct blit_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; struct pipe_shader_state vert_shader; struct pipe_shader_state frag_shader; @@ -100,7 +101,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ /* samplers */ @@ -113,8 +114,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->sampler.mag_img_filter = 0; /* set later */ ctx->sampler.normalized_coords = 1; -#if 0 - /* viewport */ + /* viewport (identity, we setup vertices in wincoords) */ ctx->viewport.scale[0] = 1.0; ctx->viewport.scale[1] = 1.0; ctx->viewport.scale[2] = 1.0; @@ -123,7 +123,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->viewport.translate[1] = 0.0; ctx->viewport.translate[2] = 0.0; ctx->viewport.translate[3] = 0.0; -#endif /* vertex shader */ { @@ -265,6 +264,9 @@ util_blit_pixels(struct blit_state *ctx, dstY1 = tmp; } + assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + /* * XXX for now we're always creating a temporary texture. * Strictly speaking that's not always needed. @@ -293,6 +295,8 @@ util_blit_pixels(struct blit_state *ctx, src, srcLeft, srcTop, /* src */ srcW, srcH); /* size */ + pipe->texture_update(pipe, tex, 0, 1 << 0); + /* save state (restored below) */ cso_save_blend(ctx->cso); cso_save_depth_stencil_alpha(ctx->cso); @@ -300,11 +304,15 @@ util_blit_pixels(struct blit_state *ctx, cso_save_samplers(ctx->cso); cso_save_sampler_textures(ctx->cso); cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -313,11 +321,11 @@ util_blit_pixels(struct blit_state *ctx, cso_single_sampler_done(ctx->cso); /* texture */ - pipe->set_sampler_textures(pipe, 1, &tex); + cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - pipe->bind_fs_state(pipe, ctx->fs); - pipe->bind_vs_state(pipe, ctx->vs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ memset(&fb, 0, sizeof(fb)); @@ -344,6 +352,9 @@ util_blit_pixels(struct blit_state *ctx, cso_restore_samplers(ctx->cso); cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); /* free the texture */ pipe_surface_reference(&texSurf, NULL); diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 61f1d9bb32..0ce9732e62 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -30,7 +30,11 @@ #define U_BLIT_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_context; struct pipe_surface; struct cso_context; @@ -58,4 +62,8 @@ util_blit_pixels(struct blit_state *ctx, float z, uint filter); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 8cb10c9820..d108d92e52 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -70,6 +70,14 @@ struct list_head (__list)->prev = (__item); \ } while(0) +#define LIST_REPLACE(__from, __to) \ + do { \ + (__to)->prev = (__from)->prev; \ + (__to)->next = (__from)->next; \ + (__from)->next->prev = (__to); \ + (__from)->prev->next = (__to); \ + } while (0) + #define LIST_DEL(__item) \ do { \ (__item)->prev->next = (__item)->next; \ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 2fd214d22e..c53c512268 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -61,6 +61,7 @@ struct gen_mipmap_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; struct pipe_shader_state vert_shader; struct pipe_shader_state frag_shader; @@ -474,7 +475,9 @@ format_to_type_comps(enum pipe_format pformat, { switch (pformat) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: *datatype = UBYTE; *comps = 4; return; @@ -490,18 +493,21 @@ format_to_type_comps(enum pipe_format pformat, *datatype = USHORT_5_6_5; *comps = 3; return; - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: *datatype = UBYTE; *comps = 1; return; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: *datatype = UBYTE; *comps = 2; return; default: assert(0); + *datatype = UBYTE; + *comps = 0; + break; } } @@ -712,7 +718,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ /* sampler state */ @@ -723,9 +729,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; ctx->sampler.normalized_coords = 1; - -#if 0 - /* viewport */ + /* viewport state (identity, verts are in wincoords) */ ctx->viewport.scale[0] = 1.0; ctx->viewport.scale[1] = 1.0; ctx->viewport.scale[2] = 1.0; @@ -734,7 +738,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->viewport.translate[1] = 0.0; ctx->viewport.translate[2] = 0.0; ctx->viewport.translate[3] = 0.0; -#endif /* vertex shader */ { @@ -775,23 +778,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { void *buf; - ctx->vertices[0][0][0] = -0.5f; /*x*/ - ctx->vertices[0][0][1] = -0.5f; /*y*/ + ctx->vertices[0][0][0] = 0.0f; /*x*/ + ctx->vertices[0][0][1] = 0.0f; /*y*/ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ - ctx->vertices[1][0][0] = width - 0.5f; /*x*/ - ctx->vertices[1][0][1] = -0.5f; /*y*/ - ctx->vertices[1][1][0] = 1.0f; /*s*/ - ctx->vertices[1][1][1] = 0.0f; /*t*/ + ctx->vertices[1][0][0] = width; + ctx->vertices[1][0][1] = 0.0f; + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = 0.0f; - ctx->vertices[2][0][0] = width - 0.5f; - ctx->vertices[2][0][1] = height - 0.5f; + ctx->vertices[2][0][0] = width; + ctx->vertices[2][0][1] = height; ctx->vertices[2][1][0] = 1.0f; ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[3][0][0] = -0.5f; - ctx->vertices[3][0][1] = height - 0.5f; + ctx->vertices[3][0][0] = 0.0f; + ctx->vertices[3][0][1] = height; ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; @@ -825,26 +828,6 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) } -#if 0 -static void -simple_viewport(struct pipe_context *pipe, uint width, uint height) -{ - struct pipe_viewport_state vp; - - vp.scale[0] = 0.5 * width; - vp.scale[1] = -0.5 * height; - vp.scale[2] = 1.0; - vp.scale[3] = 1.0; - vp.translate[0] = 0.5 * width; - vp.translate[1] = 0.5 * height; - vp.translate[2] = 0.0; - vp.translate[3] = 0.0; - - pipe->set_viewport_state(pipe, &vp); -} -#endif - - /** * Generate mipmap images. It's assumed all needed texture memory is * already allocated. @@ -880,17 +863,18 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_samplers(ctx->cso); cso_save_sampler_textures(ctx->cso); cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); - pipe->bind_vs_state(pipe, ctx->vs); - pipe->bind_fs_state(pipe, ctx->fs); -#if 0 - pipe->set_viewport_state(pipe, &ctx->viewport); -#endif + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); @@ -926,11 +910,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, ctx->sampler.lod_bias = (float) srcLevel; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); -#if 0 - simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); -#endif - pipe->set_sampler_textures(pipe, 1, &pt); + cso_set_sampler_textures(ctx->cso, 1, &pt); /* quad coords in window coords (bypassing clipping, viewport mapping) */ set_vertex_data(ctx, @@ -954,4 +935,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_samplers(ctx->cso); cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index bd9af54fb7..3277024f07 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -31,6 +31,11 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pipe_context; struct pipe_texture; struct cso_context; @@ -52,4 +57,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel, uint filter); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index cd13823985..0b917c005f 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -40,6 +40,94 @@ /** + * Pack ubyte R,G,B,A into dest pixel. + */ +static INLINE void +util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, + enum pipe_format format, void *dest) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + d[3] = (float)a / 255.0f; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + } + return; + + /* XXX lots more cases to add */ + default: + debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); + assert(0); + } +} + + +/** * Note rgba outside [0,1] will be clamped for int pixel formats. */ static INLINE void @@ -62,24 +150,54 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) *d = (r << 24) | (g << 16) | (b << 8) | a; } return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; case PIPE_FORMAT_A8R8G8B8_UNORM: { uint *d = (uint *) dest; *d = (a << 24) | (r << 16) | (g << 8) | b; } return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; case PIPE_FORMAT_B8G8R8A8_UNORM: { uint *d = (uint *) dest; *d = (b << 24) | (g << 16) | (r << 8) | a; } return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; case PIPE_FORMAT_R5G6B5_UNORM: { ushort *d = (ushort *) dest; *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *d = (float *) dest; @@ -99,7 +217,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) return; /* XXX lots more cases to add */ default: - debug_printf("gallium: unhandled format in util_pack_color()"); + debug_print_format("gallium: unhandled format in util_pack_color()", format); + assert(0); } } @@ -120,11 +239,14 @@ util_pack_z(enum pipe_format format, double z) else return (uint) (z * 0xffffffff); case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return (uint) (z * 0xffffff); case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return ((uint) (z * 0xffffff)) << 8; default: - debug_printf("gallium: unhandled fomrat in util_pack_z()"); + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); return 0; } } diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 48426abcb7..c4f4bbd30c 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -166,8 +166,8 @@ #include <config.h> #else #ifdef WIN32 -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf +#define vsnprintf util_vsnprintf +#define snprintf util_snprintf #define HAVE_VSNPRINTF 0 #define HAVE_SNPRINTF 0 #define HAVE_VASPRINTF 1 /* not needed */ @@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); int -rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +util_vsnprintf(char *str, size_t size, const char *format, va_list args) { LDOUBLE fvalue; INTMAX_T value; @@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len) #endif /* NEED_MYMEMCPY */ int -rpl_vasprintf(char **ret, const char *format, va_list ap) +util_vasprintf(char **ret, const char *format, va_list ap) { size_t size; int len; @@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap) #if !HAVE_SNPRINTF #if HAVE_STDARG_H int -rpl_snprintf(char *str, size_t size, const char *format, ...) +util_snprintf(char *str, size_t size, const char *format, ...) #else int -rpl_snprintf(va_alist) va_dcl +util_snprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H @@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl #if !HAVE_ASPRINTF #if HAVE_STDARG_H int -rpl_asprintf(char **ret, const char *format, ...) +util_asprintf(char **ret, const char *format, ...) #else int -rpl_asprintf(va_alist) va_dcl +util_asprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h new file mode 100644 index 0000000000..b99d4e8021 --- /dev/null +++ b/src/gallium/auxiliary/util/u_string.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#ifndef WIN32 +#include <stdio.h> +#endif +#include <stddef.h> +#include <stdarg.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef WIN32 +int util_vsnprintf(char *, size_t, const char *, va_list); +int util_snprintf(char *str, size_t size, const char *format, ...); +#else +#define util_vsnprintf vsnprintf +#define util_snprintf snprintf +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c new file mode 100644 index 0000000000..9b97050d51 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.c @@ -0,0 +1,162 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "util/u_time.h" + +#if defined(PIPE_OS_LINUX) +#include <sys/time.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#include <windows.h> +#include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) +#include <windows.h> +#else +#error Unsupported OS +#endif + + +#if defined(PIPE_OS_WINDOWS) +static LONGLONG frequency = 0; +#if !defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#define EngQueryPerformanceFrequency(p) QueryPerformanceFrequency((LARGE_INTEGER*)(p)) +#define EngQueryPerformanceCounter(p) QueryPerformanceCounter((LARGE_INTEGER*)(p)) +#endif +#endif + + +void +util_time_get(struct util_time *t) +{ +#if defined(PIPE_OS_LINUX) + gettimeofday(&t->tv, NULL); +#elif defined(PIPE_OS_WINDOWS) + EngQueryPerformanceCounter(&t->counter); +#endif +} + + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; + t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; +#elif defined(PIPE_OS_WINDOWS) + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL; +#endif +} + + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + return (t2->tv.tv_usec - t1->tv.tv_usec) + + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; +#elif defined(PIPE_OS_WINDOWS) + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + return (t2->counter - t1->counter)*1000000LL/frequency; +#endif +} + + +/** + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. + */ +static INLINE int +util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + if (t1->tv.tv_sec < t2->tv.tv_sec) + return -1; + else if(t1->tv.tv_sec > t2->tv.tv_sec) + return 1; + else if (t1->tv.tv_usec < t2->tv.tv_usec) + return -1; + else if(t1->tv.tv_usec > t2->tv.tv_usec) + return 1; + else + return 0; +#elif defined(PIPE_OS_WINDOWS) + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +#endif +} + + +boolean +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr) +{ + if(util_time_compare(start, end) <= 0) + return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0); + else + return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0); +} + + +#if defined(PIPE_OS_WINDOWS) +void util_time_sleep(unsigned usecs) +{ + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} +#endif diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h new file mode 100644 index 0000000000..48ec7a4a96 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_TIME_H_ +#define U_TIME_H_ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <time.h> /* timeval */ +#include <unistd.h> /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Time abstraction. + * + * Do not access this structure directly. Use the provided function instead. + */ +struct util_time +{ +#if defined(PIPE_OS_LINUX) + struct timeval tv; +#else + long long counter; +#endif +}; + + +void +util_time_get(struct util_time *t); + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2); + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2); + +/** + * Returns non-zero when the timeout expires. + */ +boolean +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr); + +#if defined(PIPE_OS_LINUX) +#define util_time_sleep usleep +#else +void +util_time_sleep(unsigned usecs); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_TIME_H_ */ |