diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2008-02-27 00:34:31 +1100 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2008-02-27 00:34:31 +1100 |
commit | 68ef52886263690632552ae187a4673945c2ab74 (patch) | |
tree | 0453c0063397c196ebe5e3dcd4d9c91392496d77 /src/gallium/auxiliary | |
parent | 026e2fd3c6eb87a010a9c90341e8a77b09376b5b (diff) | |
parent | ad6bb870de6103ed240fa1f9f828bd13a4401a9a (diff) |
Merge branch 'upstream-gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/auxiliary')
19 files changed, 647 insertions, 161 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 9e77e0774d..b427b509f8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,9 +28,22 @@ /* Authors: Zack Rusin <zack@tungstengraphics.com> */ +#include "pipe/p_util.h" + #include "cso_cache.h" #include "cso_hash.h" + +struct cso_cache { + struct cso_hash *blend_hash; + struct cso_hash *depth_stencil_hash; + struct cso_hash *fs_hash; + struct cso_hash *vs_hash; + struct cso_hash *rasterizer_hash; + struct cso_hash *sampler_hash; + int max_size; +}; + #if 1 static unsigned hash_key(const void *key, unsigned key_size) { @@ -114,12 +127,106 @@ static int _cso_size_for_type(enum cso_cache_type type) return 0; } + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_fs_state(void *state, void *data) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_vs_state(void *state, void *data) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: { + delete_blend_state(state, 0); + } + break; + case CSO_SAMPLER: { + delete_sampler_state(state, 0); + } + break; + case CSO_DEPTH_STENCIL_ALPHA: { + delete_depth_stencil_state(state, 0); + } + break; + case CSO_RASTERIZER: { + delete_rasterizer_state(state, 0); + } + break; + case CSO_FRAGMENT_SHADER: { + delete_fs_state(state, 0); + } + break; + case CSO_VERTEX_SHADER: { + delete_vs_state(state, 0); + } + break; + } + FREE(state); +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int to_remove = (max_size < max_entries) * max_entries/4; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + struct cso_hash_iter cso_insert_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(hash, type, sc->max_size); + return cso_hash_insert(hash, hash_key, state); } @@ -132,6 +239,26 @@ cso_find_state(struct cso_cache *sc, return cso_hash_find(hash, hash_key); } + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* Return the payload: + */ + return (unsigned char *)iter_data + size; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ) @@ -156,8 +283,9 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { - struct cso_cache *sc = malloc(sizeof(struct cso_cache)); + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + sc->max_size = 4096; sc->blend_hash = cso_hash_create(); sc->sampler_hash = cso_hash_create(); sc->depth_stencil_hash = cso_hash_create(); @@ -168,14 +296,78 @@ struct cso_cache *cso_cache_create(void) return sc; } +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data) +{ + struct cso_hash *hash = 0; + struct cso_hash_iter iter; + + switch (type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + if (state) { + func(state, user_data); + } + iter = cso_hash_iter_next(iter); + } +} + void cso_cache_delete(struct cso_cache *sc) { assert(sc); + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_FRAGMENT_SHADER, delete_fs_state, 0); + cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_hash_delete(sc->blend_hash); cso_hash_delete(sc->sampler_hash); cso_hash_delete(sc->depth_stencil_hash); cso_hash_delete(sc->rasterizer_hash); cso_hash_delete(sc->fs_hash); cso_hash_delete(sc->vs_hash); - free(sc); + FREE(sc); } + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + sc->max_size = number; + + sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sc->max_size); + sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size); +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 116e2eaa2c..3b0fe100b8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,9 +25,48 @@ * **************************************************************************/ - /* - * Authors: - * Zack Rusin <zack@tungstengraphics.com> + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin <zack@tungstengraphics.com> */ #ifndef CSO_CACHE_H @@ -36,46 +75,57 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" -struct cso_hash; -struct cso_cache { - struct cso_hash *blend_hash; - struct cso_hash *depth_stencil_hash; - struct cso_hash *fs_hash; - struct cso_hash *vs_hash; - struct cso_hash *rasterizer_hash; - struct cso_hash *sampler_hash; -}; +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_cache; struct cso_blend { struct pipe_blend_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; @@ -88,6 +138,8 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; +typedef void (*cso_state_callback)(void *, void *); + unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); @@ -101,7 +153,16 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc, struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index b40217c524..b3b4d667d2 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -30,12 +30,10 @@ * Zack Rusin <zack@tungstengraphics.com> */ -#include "cso_hash.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> +#include "cso_hash.h" #define MAX(a, b) ((a > b) ? (a) : (b)) @@ -98,7 +96,7 @@ struct cso_hash { static void *cso_data_allocate_node(struct cso_hash_data *hash) { - return malloc(hash->nodeSize); + return MALLOC(hash->nodeSize); } static void cso_data_free_node(struct cso_node *node) @@ -107,10 +105,10 @@ static void cso_data_free_node(struct cso_node *node) * Need to cast value ptr to original cso type, then free the * driver-specific data hanging off of it. For example: struct cso_sampler *csamp = (struct cso_sampler *) node->value; - free(csamp->data); + FREE(csamp->data); */ - free(node->value); - free(node); + FREE(node->value); + FREE(node); } static struct cso_node * @@ -134,7 +132,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) hint = countBits(-hint); if (hint < MinNumBits) hint = MinNumBits; - hash->userNumBits = hint; + hash->userNumBits = (short)hint; while (primeForNumBits(hint) < (hash->size >> 1)) ++hint; } else if (hint < MinNumBits) { @@ -147,9 +145,9 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) int oldNumBuckets = hash->numBuckets; int i = 0; - hash->numBits = hint; + hash->numBits = (short)hint; hash->numBuckets = primeForNumBits(hint); - hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets); + hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); for (i = 0; i < hash->numBuckets; ++i) hash->buckets[i] = e; @@ -173,7 +171,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) firstNode = afterLastNode; } } - free(oldBuckets); + FREE(oldBuckets); } } @@ -235,13 +233,13 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, struct cso_hash * cso_hash_create(void) { - struct cso_hash *hash = malloc(sizeof(struct cso_hash)); - hash->data.d = malloc(sizeof(struct cso_hash_data)); + struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + hash->data.d = MALLOC_STRUCT(cso_hash_data); hash->data.d->fakeNext = 0; hash->data.d->buckets = 0; hash->data.d->size = 0; hash->data.d->nodeSize = sizeof(struct cso_node); - hash->data.d->userNumBits = MinNumBits; + hash->data.d->userNumBits = (short)MinNumBits; hash->data.d->numBits = 0; hash->data.d->numBuckets = 0; @@ -261,9 +259,9 @@ void cso_hash_delete(struct cso_hash *hash) cur = next; } } - free(hash->data.d->buckets); - free(hash->data.d); - free(hash); + FREE(hash->data.d->buckets); + FREE(hash->data.d); + FREE(hash); } struct cso_hash_iter cso_hash_find(struct cso_hash *hash, @@ -301,7 +299,7 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) a.next = node->next; if (!a.next) { - fprintf(stderr, "iterating beyond the last element\n"); + debug_printf("iterating beyond the last element\n"); return 0; } if (a.next->next) @@ -352,7 +350,7 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) --bucket; --start; } - fprintf(stderr, "iterating backward beyond first element\n"); + debug_printf("iterating backward beyond first element\n"); return a.e; } @@ -397,3 +395,8 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; return iter; } + +int cso_hash_size(struct cso_hash *hash) +{ + return hash->data.d->size; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index b4aa111860..d5bca9d591 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -33,6 +33,11 @@ #ifndef CSO_HASH_H #define CSO_HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + struct cso_hash; struct cso_node; @@ -42,7 +47,9 @@ struct cso_hash_iter { }; struct cso_hash *cso_hash_create(void); -void cso_hash_delete(struct cso_hash *hash); +void cso_hash_delete(struct cso_hash *hash); + +int cso_hash_size(struct cso_hash *hash); struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, void *data); @@ -59,4 +66,17 @@ void *cso_hash_iter_data(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + +/* KW: a convenience routine: + */ +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ); + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 8e3a8caa74..3302dc44f7 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,6 +3,8 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ + 'draw_aaline.c', + 'draw_aapoint.c', 'draw_clip.c', 'draw_vs_exec.c', 'draw_vs_sse.c', @@ -13,6 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_flatshade.c', 'draw_offset.c', 'draw_prim.c', + 'draw_pstipple.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 43119cc70b..cae6fcd4d2 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -222,7 +222,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * * Temp reg0 usage: * t0.x = distance of fragment from center point - * t0.y = boolean, is t0.x > 1 ? + * t0.y = boolean, is t0.x > 1.0, also misc temp usage * t0.z = temporary for computing 1/(1-k) value * t0.w = final coverage value */ @@ -313,9 +313,73 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; ctx->emit_instruction(ctx, &newInst); - /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */ + + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.y, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; newInst.FullDstRegisters[0].DstRegister.Index = tmp0; @@ -329,111 +393,40 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); - /* IF t0.y # if b then */ + /* CMP t0.w, -t0.y, tex.w, t0.w; + * # if -t0.y < 0 then + * t0.w = 1 + * else + * t0.w = t0.w + */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_IF; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 1; + newInst.Instruction.Opcode = TGSI_OPCODE_CMP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 3; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); - { - /* compute coverage factor = (1-d)/(1-k) */ - - /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* RCP t0.z, t0.z; # t0.z = 1 / m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_RCP; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* SUB t0.x, 1, t0.x; # d = 1 - d */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - ctx->emit_instruction(ctx, &newInst); - - /* MUL t0.w, t0.x, t0.z; # coverage = d * m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - } - - /* ELSE */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_ELSE; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 0; - ctx->emit_instruction(ctx, &newInst); - - { - /* MOV t0.w, tex.w; # coverage = 1.0 */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MOV; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - ctx->emit_instruction(ctx, &newInst); - } - - /* ENDIF */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 0; - ctx->emit_instruction(ctx, &newInst); } if (inst->Instruction.Opcode == TGSI_OPCODE_END) { @@ -516,7 +509,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) (struct tgsi_token *) aapoint_fs.tokens, MAX, &transform.base); -#if 0 /* DEBUG */ +#if 1 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); tgsi_dump(aapoint_fs.tokens, 0); #endif @@ -613,13 +606,16 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) * ELSE * coverage = 1.0; // full coverage * ENDIF + * + * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to + * avoid using IF/ELSE/ENDIF TGSI opcodes. */ #if !NORMALIZE - k = 1.0 / radius; - k = 1.0 - 2.0 * k + k * k; + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; #else - k = 1.0 - 1.0 / radius; + k = 1.0f - 1.0f / radius; #endif /* allocate/dup new verts */ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c28e78d33a..7dd1c6f6fa 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -244,14 +244,32 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable) /** - * The draw module may sometimes generate vertices with extra attributes - * (such as texcoords for AA lines). The driver can call this function - * to find those attributes. + * Ask the draw module for the location/slot of the given vertex attribute in + * a post-transformed vertex. + * + * With this function, drivers that use the draw module should have no reason + * to track the current vertex shader. + * + * Note that the draw module may sometimes generate vertices with extra + * attributes (such as texcoords for AA lines). The driver can call this + * function to find those attributes. + * + * Zero is returned if the attribute is not found since this is + * a don't care / undefined situtation. Returning -1 would be a bit more + * work for the drivers. */ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { + const struct pipe_shader_state *vs = draw->vertex_shader->state; + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + /* XXX there may be more than one extra vertex attrib. * For example, simulated gl_FragCoord and gl_PointCoord. */ diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 4048abf856..1ab04cd959 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -133,7 +133,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, pctx->maxSampler = (int) decl->u.DeclarationRange.Last; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) pctx->wincoordInput = (int) decl->u.DeclarationRange.First; } @@ -332,7 +332,7 @@ generate_pstip_fs(struct pstip_stage *pstip) if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; - pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; pstip_fs.num_inputs++; } diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index efd6793f2b..3a19dd4cd7 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -45,6 +45,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) struct draw_stage *next = draw->pipeline.rasterize; int need_det = 0; int precalc_flat = 0; + boolean wide_lines, wide_points; /* Set the validate's next stage to the rasterize stage, so that it * can be found later if needed for flushing. @@ -68,9 +69,18 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aapoint; } - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines - && !draw->rasterizer->line_smooth) || - (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width != 1.0 + && draw->convert_wide_lines + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + wide_points = (draw->rasterizer->point_size != 1.0 + && draw->convert_wide_points + && !draw->pipeline.aapoint); + + if (wide_lines || + wide_points || draw->rasterizer->point_sprite) { draw->pipeline.wide->next = next; next = draw->pipeline.wide; diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 97beb5f72a..f5b5f4052f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -166,6 +166,7 @@ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); + assert(buf->vtbl); buf->vtbl->destroy(buf); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index e2ee72ed1f..6e217eb2e0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -285,7 +285,9 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); +#ifndef __MSC__ sched_yield(); +#endif _fenced_buffer_list_check_free(fenced_list, 1); _glthread_LOCK_MUTEX(fenced_list->mutex); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index ff4fd123f3..66256f3fa7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -192,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, } /* Some sanity checks */ - assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size); - assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size); + assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); + assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); _glthread_UNLOCK_MUTEX(mm->mutex); return SUPER(mm_buf); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dcbf76f600..4d33950e99 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -25,6 +25,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" +#include "pipe/p_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" @@ -34,7 +35,7 @@ static unsigned char *cptr( void (*label)() ) { - return (unsigned char *)(unsigned long)label; + return (unsigned char *) label; } @@ -46,7 +47,7 @@ static void do_realloc( struct x86_function *p ) p->csr = p->store; } else { - unsigned used = p->csr - p->store; + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); @@ -60,7 +61,7 @@ static void do_realloc( struct x86_function *p ) */ static unsigned char *reserve( struct x86_function *p, int bytes ) { - if (p->csr + bytes - p->store > p->size) + if (p->csr + bytes - p->store > (int) p->size) do_realloc(p); { @@ -135,7 +136,7 @@ static void emit_modrm( struct x86_function *p, case mod_INDIRECT: break; case mod_DISP8: - emit_1b(p, regmem.disp); + emit_1b(p, (char) regmem.disp); break; case mod_DISP32: emit_1i(p, regmem.disp); @@ -251,14 +252,14 @@ void x86_jcc( struct x86_function *p, enum x86_cc cc, unsigned char *label ) { - int offset = label - (x86_get_label(p) + 2); + intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = label - (x86_get_label(p) + 6); + offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -293,13 +294,13 @@ unsigned char *x86_call_forward( struct x86_function *p) void x86_fixup_fwd_jump( struct x86_function *p, unsigned char *fixup ) { - *(int *)(fixup - 4) = x86_get_label(p) - fixup; + *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); } void x86_jmp( struct x86_function *p, unsigned char *label) { emit_1ub(p, 0xe9); - emit_1i(p, label - x86_get_label(p) - 4); + emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } #if 0 @@ -1207,7 +1208,7 @@ void (*x86_get_func( struct x86_function *p ))(void) { if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) (unsigned long) p->store; + return (void (*)(void)) p->store; } #else diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 71f64b747c..5555639b70 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ util/tgsi_build.c \ util/tgsi_dump.c \ util/tgsi_parse.c \ + util/tgsi_scan.c \ util/tgsi_transform.c \ util/tgsi_util.c diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 8464bfe944..4632dcc072 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -8,6 +8,8 @@ tgsi = env.ConvenienceLibrary( 'util/tgsi_build.c', 'util/tgsi_dump.c', 'util/tgsi_parse.c', + 'util/tgsi_scan.c', + 'util/tgsi_transform.c', 'util/tgsi_util.c', ]) diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index d7b18dc9c5..ac52441400 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { - assert(pc < mach->NumInstructions); + assert(pc < (int) mach->NumInstructions); exec_instruction( mach, mach->Instructions + pc, &pc ); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index b5c54847e0..ff74e6117c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] = "SEMANTIC_BCOLOR", "SEMANTIC_FOG", "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC," + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" }; static const char *TGSI_SEMANTICS_SHORT[] = @@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] = "FOG", "PSIZE", "GENERIC", + "NORMAL" }; static const char *TGSI_IMMS[] = diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c new file mode 100644 index 0000000000..4b99ac37cc --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "tgsi_scan.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" + + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->u.DeclarationRange.First; + i <= fulldecl->u.DeclarationRange.Last; + i++) { + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h new file mode 100644 index 0000000000..757446437c --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +#endif /* TGSI_SCAN_H */ |